diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -688,6 +688,9 @@
   /// would typically be allowed using throughput or size cost models.
   bool hasDivRemOp(Type *DataType, bool IsSigned) const;
 
+  /// Returns the maximum bitwidth of legal div and rem instructions.
+  unsigned maxLegalDivRemBitWidth() const;
+
   /// Return true if the given instruction (assumed to be a memory access
   /// instruction) has a volatile variant. If that's the case then we can avoid
   /// addrspacecast to generic AS for volatile loads/stores. Default
@@ -1583,6 +1586,7 @@
   virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
   virtual bool enableOrderedReductions() = 0;
   virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
+  virtual unsigned maxLegalDivRemBitWidth() = 0;
   virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
   virtual bool prefersVectorizedAddressing() = 0;
   virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
@@ -2012,6 +2016,9 @@
   bool hasDivRemOp(Type *DataType, bool IsSigned) override {
     return Impl.hasDivRemOp(DataType, IsSigned);
   }
+  unsigned maxLegalDivRemBitWidth() override {
+    return Impl.maxLegalDivRemBitWidth();
+  }
   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
     return Impl.hasVolatileVariant(I, AddrSpace);
   }
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -285,6 +285,10 @@
 
   bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
 
+  bool maxLegalDivRemBitWidth() const {
+    return llvm::IntegerType::MAX_INT_BITS;
+  }
+
   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
     return false;
   }
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -438,6 +438,10 @@
   return TTIImpl->hasDivRemOp(DataType, IsSigned);
 }
 
+unsigned TargetTransformInfo::maxLegalDivRemBitWidth() const {
+  return TTIImpl->maxLegalDivRemBitWidth();
+}
+
 bool TargetTransformInfo::hasVolatileVariant(Instruction *I,
                                              unsigned AddrSpace) const {
   return TTIImpl->hasVolatileVariant(I, AddrSpace);
diff --git a/llvm/lib/CodeGen/ExpandLargeDivRem.cpp b/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
--- a/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
+++ b/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
@@ -29,14 +30,22 @@
 using namespace llvm;
 
 static cl::opt<unsigned>
-    ExpandDivRemBits("expand-div-rem-bits", cl::Hidden, cl::init(128),
-                     cl::desc("div and rem instructions on integers with <N> "
-                              "or more bits are expanded."));
+    ExpandDivRemBits("expand-div-rem-bits", cl::Hidden,
+                     cl::init(llvm::IntegerType::MAX_INT_BITS),
+                     cl::desc("div and rem instructions on integers with "
+                              "more than <N> bits are expanded."));
 
-static bool runImpl(Function &F) {
+static bool runImpl(Function &F, const TargetTransformInfo &TTI) {
   SmallVector<BinaryOperator *, 4> Replace;
   bool Modified = false;
 
+  unsigned MaxLegalDivRemBitWidth = TTI.maxLegalDivRemBitWidth();
+  if (ExpandDivRemBits != llvm::IntegerType::MAX_INT_BITS)
+    MaxLegalDivRemBitWidth = ExpandDivRemBits;
+
+  if (MaxLegalDivRemBitWidth >= llvm::IntegerType::MAX_INT_BITS)
+    return false;
+
   for (auto &I : instructions(F)) {
     switch (I.getOpcode()) {
     case Instruction::UDiv:
@@ -45,7 +54,7 @@
     case Instruction::SRem: {
       // TODO: This doesn't handle vectors.
       auto *IntTy = dyn_cast<IntegerType>(I.getType());
-      if (!IntTy || IntTy->getIntegerBitWidth() <= ExpandDivRemBits)
+      if (!IntTy || IntTy->getIntegerBitWidth() <= MaxLegalDivRemBitWidth)
         continue;
 
       Replace.push_back(&cast<BinaryOperator>(I));
@@ -76,7 +85,8 @@
 
 PreservedAnalyses ExpandLargeDivRemPass::run(Function &F,
                                              FunctionAnalysisManager &AM) {
-  bool Changed = runImpl(F);
+  TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
+  bool Changed = runImpl(F, TTI);
 
   if (Changed)
     return PreservedAnalyses::none();
@@ -92,9 +102,13 @@
     initializeExpandLargeDivRemLegacyPassPass(*PassRegistry::getPassRegistry());
   }
 
-  bool runOnFunction(Function &F) override { return runImpl(F); }
+  bool runOnFunction(Function &F) override {
+    auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+    return runImpl(F, TTI);
+  }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetTransformInfoWrapperPass>();
     AU.addPreserved<AAResultsWrapperPass>();
     AU.addPreserved<GlobalsAAWrapperPass>();
   }
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1113,6 +1113,7 @@
 
   addPass(createPreISelIntrinsicLoweringPass());
   PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+  addPass(createExpandLargeDivRemPass());
   addIRPasses();
   addCodeGenPrepare();
   addPassesToHandleExceptions();
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -317,6 +317,8 @@
 
   bool enableOrderedReductions() const { return true; }
 
+  unsigned maxLegalDivRemBitWidth() const { return 128; }
+
   InstructionCost getInterleavedMemoryOpCost(
       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -207,6 +207,8 @@
     return isLegalMaskedGather(Ty, Alignment);
   }
 
+  unsigned maxLegalDivRemBitWidth() const { return 128; }
+
   InstructionCost getMemcpyCost(const Instruction *I);
 
   int getNumMemOps(const IntrinsicInst *I) const;
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -242,6 +242,7 @@
   bool isLegalMaskedExpandLoad(Type *DataType);
   bool isLegalMaskedCompressStore(Type *DataType);
   bool hasDivRemOp(Type *DataType, bool IsSigned);
+  unsigned maxLegalDivRemBitWidth() const;
   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty);
   bool areInlineCompatible(const Function *Caller,
                            const Function *Callee) const;
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -5353,6 +5353,8 @@
   return TLI->isOperationLegal(IsSigned ? ISD::SDIVREM : ISD::UDIVREM, VT);
 }
 
+unsigned X86TTIImpl::maxLegalDivRemBitWidth() const { return 128; }
+
 bool X86TTIImpl::isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
   return false;
 }
diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -15,6 +15,7 @@
 ; CHECK-NEXT:   ModulePass Manager
 ; CHECK-NEXT:     Pre-ISel Intrinsic Lowering
 ; CHECK-NEXT:     FunctionPass Manager
+; CHECK-NEXT:       Expand large div/rem
 ; CHECK-NEXT:       Expand Atomic instructions
 ; CHECK-NEXT:       Module Verifier
 ; CHECK-NEXT:       Lower Garbage Collection Instructions
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -18,6 +18,7 @@
 ; CHECK-NEXT:   ModulePass Manager
 ; CHECK-NEXT:     Pre-ISel Intrinsic Lowering
 ; CHECK-NEXT:     FunctionPass Manager
+; CHECK-NEXT:       Expand large div/rem
 ; CHECK-NEXT:       Expand Atomic instructions
 ; CHECK-NEXT:     SVE intrinsics optimizations
 ; CHECK-NEXT:       FunctionPass Manager
diff --git a/llvm/test/CodeGen/AArch64/udivmodei5.ll b/llvm/test/CodeGen/AArch64/udivmodei5.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/udivmodei5.ll
@@ -0,0 +1,2632 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnuabi < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64_be-linux-gnuabi < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define void @udiv129(i129* %ptr, i129* %out) nounwind {
+; CHECK-LABEL: udiv129:
+; CHECK:       // %bb.0: // %_udiv-special-cases
+; CHECK-NEXT:    ldp x10, x11, [x0]
+; CHECK-NEXT:    mov w8, #127
+; CHECK-NEXT:    ldrb w14, [x0, #16]
+; CHECK-NEXT:    clz x13, x10
+; CHECK-NEXT:    orr x9, x10, x14
+; CHECK-NEXT:    clz x12, x11
+; CHECK-NEXT:    orr x9, x9, x11
+; CHECK-NEXT:    add x13, x13, #64
+; CHECK-NEXT:    cmp x9, #0
+; CHECK-NEXT:    clz x15, x14
+; CHECK-NEXT:    cset w17, eq
+; CHECK-NEXT:    cmp x11, #0
+; CHECK-NEXT:    csel x12, x12, x13, ne
+; CHECK-NEXT:    add x9, x15, #64
+; CHECK-NEXT:    cmp x14, #0
+; CHECK-NEXT:    add x12, x12, #128
+; CHECK-NEXT:    csel x9, x9, x12, ne
+; CHECK-NEXT:    subs x9, x9, #127
+; CHECK-NEXT:    ngcs x13, xzr
+; CHECK-NEXT:    ngcs x15, xzr
+; CHECK-NEXT:    ngc x18, xzr
+; CHECK-NEXT:    subs x12, x8, x9
+; CHECK-NEXT:    ngcs x9, x13
+; CHECK-NEXT:    ngcs x16, x15
+; CHECK-NEXT:    ngc x8, x18
+; CHECK-NEXT:    cmp x16, #0
+; CHECK-NEXT:    cset w13, ne
+; CHECK-NEXT:    cmp x8, #0
+; CHECK-NEXT:    cset w15, ne
+; CHECK-NEXT:    orr x8, x16, x8
+; CHECK-NEXT:    csel w13, w13, w15, eq
+; CHECK-NEXT:    cmp x12, #128
+; CHECK-NEXT:    cset w15, hi
+; CHECK-NEXT:    cmp x9, #0
+; CHECK-NEXT:    cset w18, ne
+; CHECK-NEXT:    csel w15, w15, w18, eq
+; CHECK-NEXT:    cmp x8, #0
+; CHECK-NEXT:    csel w8, w15, w13, eq
+; CHECK-NEXT:    orr w17, w17, w8
+; CHECK-NEXT:    cmp w17, #0
+; CHECK-NEXT:    csel x15, xzr, x14, ne
+; CHECK-NEXT:    csel x13, xzr, x11, ne
+; CHECK-NEXT:    csel x8, xzr, x10, ne
+; CHECK-NEXT:    tbnz w17, #0, .LBB0_7
+; CHECK-NEXT:  // %bb.1: // %_udiv-special-cases
+; CHECK-NEXT:    and x16, x16, #0x1
+; CHECK-NEXT:    eor x17, x12, #0x80
+; CHECK-NEXT:    orr x17, x17, x16
+; CHECK-NEXT:    orr x17, x17, x9
+; CHECK-NEXT:    cbz x17, .LBB0_7
+; CHECK-NEXT:  // %bb.2: // %udiv-bb1
+; CHECK-NEXT:    mvn w13, w12
+; CHECK-NEXT:    mov w17, #128
+; CHECK-NEXT:    adds x8, x12, #1
+; CHECK-NEXT:    lsl x18, x11, #1
+; CHECK-NEXT:    sub x17, x17, x12
+; CHECK-NEXT:    lsr x15, x10, x12
+; CHECK-NEXT:    adcs x9, x9, xzr
+; CHECK-NEXT:    lsl x13, x18, x13
+; CHECK-NEXT:    cinc x16, x16, hs
+; CHECK-NEXT:    orr x13, x13, x15
+; CHECK-NEXT:    lsr x15, x11, x12
+; CHECK-NEXT:    tst x12, #0x40
+; CHECK-NEXT:    csel x15, x15, x13, ne
+; CHECK-NEXT:    lsl x13, x14, x17
+; CHECK-NEXT:    tst x17, #0x40
+; CHECK-NEXT:    mvn w2, w17
+; CHECK-NEXT:    csel x0, xzr, x13, ne
+; CHECK-NEXT:    neg x12, x12
+; CHECK-NEXT:    orr w15, w0, w15
+; CHECK-NEXT:    lsr x0, x10, #1
+; CHECK-NEXT:    and x13, x16, #0x1
+; CHECK-NEXT:    lsl x16, x11, x17
+; CHECK-NEXT:    lsr x0, x0, x2
+; CHECK-NEXT:    lsl x2, x10, x12
+; CHECK-NEXT:    orr x16, x16, x0
+; CHECK-NEXT:    lsl x0, x10, x17
+; CHECK-NEXT:    csel x16, x0, x16, ne
+; CHECK-NEXT:    csel x0, xzr, x0, ne
+; CHECK-NEXT:    tst x12, #0x40
+; CHECK-NEXT:    csel x12, xzr, x2, ne
+; CHECK-NEXT:    cmp x17, #128
+; CHECK-NEXT:    csel x2, x15, x12, lo
+; CHECK-NEXT:    csel x12, x0, xzr, lo
+; CHECK-NEXT:    csel x15, x16, xzr, lo
+; CHECK-NEXT:    orr x16, x8, x13
+; CHECK-NEXT:    cmp x17, #0
+; CHECK-NEXT:    orr x16, x16, x9
+; CHECK-NEXT:    csel x0, x14, x2, eq
+; CHECK-NEXT:    cbz x16, .LBB0_5
+; CHECK-NEXT:  // %bb.3: // %udiv-preheader
+; CHECK-NEXT:    mvn w2, w8
+; CHECK-NEXT:    mov w5, #128
+; CHECK-NEXT:    sub x5, x5, x8
+; CHECK-NEXT:    lsr x3, x10, x8
+; CHECK-NEXT:    lsr x4, x11, x8
+; CHECK-NEXT:    tst x8, #0x40
+; CHECK-NEXT:    lsl x18, x18, x2
+; CHECK-NEXT:    csel x2, xzr, x4, ne
+; CHECK-NEXT:    orr x18, x18, x3
+; CHECK-NEXT:    lsl x3, x14, x5
+; CHECK-NEXT:    csel x18, x4, x18, ne
+; CHECK-NEXT:    tst x5, #0x40
+; CHECK-NEXT:    csel x4, xzr, x3, ne
+; CHECK-NEXT:    csel x3, x3, xzr, ne
+; CHECK-NEXT:    subs x5, x8, #128
+; CHECK-NEXT:    orr x2, x2, x3
+; CHECK-NEXT:    csel x6, x2, xzr, lo
+; CHECK-NEXT:    tst x5, #0x40
+; CHECK-NEXT:    orr x18, x18, x4
+; CHECK-NEXT:    mov x16, xzr
+; CHECK-NEXT:    lsr x14, x14, x5
+; CHECK-NEXT:    mov x17, xzr
+; CHECK-NEXT:    csel x14, xzr, x14, ne
+; CHECK-NEXT:    cmp x8, #128
+; CHECK-NEXT:    csel x14, x18, x14, lo
+; CHECK-NEXT:    cmp x8, #0
+; CHECK-NEXT:    and x3, x0, #0x1
+; CHECK-NEXT:    csel x2, x10, x14, eq
+; CHECK-NEXT:    csel x0, x11, x6, eq
+; CHECK-NEXT:    mov w10, #2
+; CHECK-NEXT:    mov x14, #-1
+; CHECK-NEXT:    mov w18, #1
+; CHECK-NEXT:  .LBB0_4: // %udiv-do-while
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    extr x4, x0, x2, #63
+; CHECK-NEXT:    bfi x3, x2, #1, #63
+; CHECK-NEXT:    cmp x10, x3
+; CHECK-NEXT:    lsr x0, x0, #63
+; CHECK-NEXT:    mov x11, xzr
+; CHECK-NEXT:    ngcs xzr, x4
+; CHECK-NEXT:    ngc x0, x0
+; CHECK-NEXT:    sbfx x5, x0, #0, #1
+; CHECK-NEXT:    and x0, x5, #0x3
+; CHECK-NEXT:    subs x2, x3, x0
+; CHECK-NEXT:    extr x3, x15, x12, #63
+; CHECK-NEXT:    sbcs x0, x4, xzr
+; CHECK-NEXT:    lsr x15, x15, #63
+; CHECK-NEXT:    subs x8, x8, #1
+; CHECK-NEXT:    orr w15, w17, w15
+; CHECK-NEXT:    adcs x9, x9, x14
+; CHECK-NEXT:    and x4, x15, #0x1
+; CHECK-NEXT:    adc x13, x13, x18
+; CHECK-NEXT:    orr x15, x17, x3
+; CHECK-NEXT:    and x13, x13, #0x1
+; CHECK-NEXT:    orr x12, x16, x12, lsl #1
+; CHECK-NEXT:    orr x17, x8, x13
+; CHECK-NEXT:    and x16, x5, #0x1
+; CHECK-NEXT:    orr x5, x17, x9
+; CHECK-NEXT:    mov x3, x4
+; CHECK-NEXT:    mov x17, xzr
+; CHECK-NEXT:    cbnz x5, .LBB0_4
+; CHECK-NEXT:    b .LBB0_6
+; CHECK-NEXT:  .LBB0_5:
+; CHECK-NEXT:    mov x11, xzr
+; CHECK-NEXT:  .LBB0_6: // %udiv-loop-exit
+; CHECK-NEXT:    extr x9, x15, x12, #63
+; CHECK-NEXT:    lsr x10, x15, #63
+; CHECK-NEXT:    orr w10, w11, w10
+; CHECK-NEXT:    orr x8, x16, x12, lsl #1
+; CHECK-NEXT:    and x15, x10, #0x1
+; CHECK-NEXT:    orr x13, x11, x9
+; CHECK-NEXT:  .LBB0_7: // %udiv-end
+; CHECK-NEXT:    and w9, w15, #0x1
+; CHECK-NEXT:    stp x8, x13, [x1]
+; CHECK-NEXT:    strb w9, [x1, #16]
+; CHECK-NEXT:    ret
+;
+; CHECK-BE-LABEL: udiv129:
+; CHECK-BE:       // %bb.0: // %_udiv-special-cases
+; CHECK-BE-NEXT:    ldp x10, x9, [x0]
+; CHECK-BE-NEXT:    mov w8, #127
+; CHECK-BE-NEXT:    ldrb w11, [x0, #16]
+; CHECK-BE-NEXT:    lsr x14, x10, #56
+; CHECK-BE-NEXT:    extr x13, x10, x9, #56
+; CHECK-BE-NEXT:    bfi x11, x9, #8, #56
+; CHECK-BE-NEXT:    clz x9, x11
+; CHECK-BE-NEXT:    add x9, x9, #64
+; CHECK-BE-NEXT:    clz x10, x13
+; CHECK-BE-NEXT:    cmp x13, #0
+; CHECK-BE-NEXT:    csel x9, x10, x9, ne
+; CHECK-BE-NEXT:    ands x16, x14, #0x1
+; CHECK-BE-NEXT:    orr x10, x11, x16
+; CHECK-BE-NEXT:    clz x12, x16
+; CHECK-BE-NEXT:    add x9, x9, #128
+; CHECK-BE-NEXT:    orr x10, x10, x13
+; CHECK-BE-NEXT:    add x12, x12, #64
+; CHECK-BE-NEXT:    csel x9, x12, x9, ne
+; CHECK-BE-NEXT:    cmp x10, #0
+; CHECK-BE-NEXT:    cset w12, eq
+; CHECK-BE-NEXT:    subs x9, x9, #127
+; CHECK-BE-NEXT:    ngcs x15, xzr
+; CHECK-BE-NEXT:    ngcs x17, xzr
+; CHECK-BE-NEXT:    ngc x18, xzr
+; CHECK-BE-NEXT:    subs x10, x8, x9
+; CHECK-BE-NEXT:    ngcs x9, x15
+; CHECK-BE-NEXT:    ngcs x15, x17
+; CHECK-BE-NEXT:    ngc x8, x18
+; CHECK-BE-NEXT:    cmp x15, #0
+; CHECK-BE-NEXT:    cset w17, ne
+; CHECK-BE-NEXT:    cmp x8, #0
+; CHECK-BE-NEXT:    cset w18, ne
+; CHECK-BE-NEXT:    orr x8, x15, x8
+; CHECK-BE-NEXT:    csel w17, w17, w18, eq
+; CHECK-BE-NEXT:    cmp x10, #128
+; CHECK-BE-NEXT:    cset w18, hi
+; CHECK-BE-NEXT:    cmp x9, #0
+; CHECK-BE-NEXT:    cset w0, ne
+; CHECK-BE-NEXT:    csel w18, w18, w0, eq
+; CHECK-BE-NEXT:    cmp x8, #0
+; CHECK-BE-NEXT:    csel w8, w18, w17, eq
+; CHECK-BE-NEXT:    orr w17, w12, w8
+; CHECK-BE-NEXT:    cmp w17, #0
+; CHECK-BE-NEXT:    csel x8, xzr, x11, ne
+; CHECK-BE-NEXT:    csel x12, xzr, x13, ne
+; CHECK-BE-NEXT:    csel x14, xzr, x14, ne
+; CHECK-BE-NEXT:    tbnz w17, #0, .LBB0_7
+; CHECK-BE-NEXT:  // %bb.1: // %_udiv-special-cases
+; CHECK-BE-NEXT:    and x15, x15, #0x1
+; CHECK-BE-NEXT:    eor x17, x10, #0x80
+; CHECK-BE-NEXT:    orr x17, x17, x15
+; CHECK-BE-NEXT:    orr x17, x17, x9
+; CHECK-BE-NEXT:    cbz x17, .LBB0_7
+; CHECK-BE-NEXT:  // %bb.2: // %udiv-bb1
+; CHECK-BE-NEXT:    mvn w12, w10
+; CHECK-BE-NEXT:    mov w17, #128
+; CHECK-BE-NEXT:    adds x8, x10, #1
+; CHECK-BE-NEXT:    lsl x18, x13, #1
+; CHECK-BE-NEXT:    sub x17, x17, x10
+; CHECK-BE-NEXT:    lsr x14, x11, x10
+; CHECK-BE-NEXT:    adcs x9, x9, xzr
+; CHECK-BE-NEXT:    lsl x12, x18, x12
+; CHECK-BE-NEXT:    cinc x15, x15, hs
+; CHECK-BE-NEXT:    orr x12, x12, x14
+; CHECK-BE-NEXT:    lsr x14, x13, x10
+; CHECK-BE-NEXT:    tst x10, #0x40
+; CHECK-BE-NEXT:    csel x14, x14, x12, ne
+; CHECK-BE-NEXT:    lsl x12, x16, x17
+; CHECK-BE-NEXT:    tst x17, #0x40
+; CHECK-BE-NEXT:    mvn w2, w17
+; CHECK-BE-NEXT:    csel x0, xzr, x12, ne
+; CHECK-BE-NEXT:    neg x10, x10
+; CHECK-BE-NEXT:    orr w14, w0, w14
+; CHECK-BE-NEXT:    lsr x0, x11, #1
+; CHECK-BE-NEXT:    and x12, x15, #0x1
+; CHECK-BE-NEXT:    lsl x15, x13, x17
+; CHECK-BE-NEXT:    lsr x0, x0, x2
+; CHECK-BE-NEXT:    lsl x2, x11, x10
+; CHECK-BE-NEXT:    orr x15, x15, x0
+; CHECK-BE-NEXT:    lsl x0, x11, x17
+; CHECK-BE-NEXT:    csel x15, x0, x15, ne
+; CHECK-BE-NEXT:    csel x0, xzr, x0, ne
+; CHECK-BE-NEXT:    tst x10, #0x40
+; CHECK-BE-NEXT:    csel x10, xzr, x2, ne
+; CHECK-BE-NEXT:    cmp x17, #128
+; CHECK-BE-NEXT:    csel x2, x14, x10, lo
+; CHECK-BE-NEXT:    csel x10, x0, xzr, lo
+; CHECK-BE-NEXT:    csel x14, x15, xzr, lo
+; CHECK-BE-NEXT:    orr x15, x8, x12
+; CHECK-BE-NEXT:    cmp x17, #0
+; CHECK-BE-NEXT:    orr x15, x15, x9
+; CHECK-BE-NEXT:    csel x0, x16, x2, eq
+; CHECK-BE-NEXT:    cbz x15, .LBB0_5
+; CHECK-BE-NEXT:  // %bb.3: // %udiv-preheader
+; CHECK-BE-NEXT:    mvn w2, w8
+; CHECK-BE-NEXT:    lsr x3, x11, x8
+; CHECK-BE-NEXT:    lsr x4, x13, x8
+; CHECK-BE-NEXT:    tst x8, #0x40
+; CHECK-BE-NEXT:    and x16, x16, #0x1
+; CHECK-BE-NEXT:    mov x17, xzr
+; CHECK-BE-NEXT:    lsl x18, x18, x2
+; CHECK-BE-NEXT:    mov w2, #128
+; CHECK-BE-NEXT:    orr x18, x18, x3
+; CHECK-BE-NEXT:    sub x5, x2, x8
+; CHECK-BE-NEXT:    csel x3, xzr, x4, ne
+; CHECK-BE-NEXT:    csel x18, x4, x18, ne
+; CHECK-BE-NEXT:    lsr x4, x16, x8
+; CHECK-BE-NEXT:    mov x15, xzr
+; CHECK-BE-NEXT:    csel x2, xzr, x4, ne
+; CHECK-BE-NEXT:    cmp x8, #128
+; CHECK-BE-NEXT:    csel x2, x2, xzr, lo
+; CHECK-BE-NEXT:    lsl x4, x16, x5
+; CHECK-BE-NEXT:    tst x5, #0x40
+; CHECK-BE-NEXT:    csel x5, xzr, x4, ne
+; CHECK-BE-NEXT:    csel x4, x4, xzr, ne
+; CHECK-BE-NEXT:    subs x6, x8, #128
+; CHECK-BE-NEXT:    orr x3, x3, x4
+; CHECK-BE-NEXT:    csel x3, x3, xzr, lo
+; CHECK-BE-NEXT:    tst x6, #0x40
+; CHECK-BE-NEXT:    orr x18, x18, x5
+; CHECK-BE-NEXT:    and x4, x0, #0x1
+; CHECK-BE-NEXT:    lsr x16, x16, x6
+; CHECK-BE-NEXT:    csel x16, xzr, x16, ne
+; CHECK-BE-NEXT:    cmp x8, #128
+; CHECK-BE-NEXT:    csel x16, x18, x16, lo
+; CHECK-BE-NEXT:    cmp x8, #0
+; CHECK-BE-NEXT:    csel x0, x11, x16, eq
+; CHECK-BE-NEXT:    csel x3, x13, x3, eq
+; CHECK-BE-NEXT:    mov w11, #2
+; CHECK-BE-NEXT:    mov x16, #-1
+; CHECK-BE-NEXT:    mov w18, #1
+; CHECK-BE-NEXT:  .LBB0_4: // %udiv-do-while
+; CHECK-BE-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-BE-NEXT:    extr x5, x3, x0, #63
+; CHECK-BE-NEXT:    extr x2, x2, x3, #63
+; CHECK-BE-NEXT:    bfi x4, x0, #1, #63
+; CHECK-BE-NEXT:    lsr x7, x14, #63
+; CHECK-BE-NEXT:    cmp x11, x4
+; CHECK-BE-NEXT:    mov x13, xzr
+; CHECK-BE-NEXT:    ngcs xzr, x5
+; CHECK-BE-NEXT:    ngc x0, x2
+; CHECK-BE-NEXT:    sbfx x6, x0, #0, #1
+; CHECK-BE-NEXT:    and x0, x6, #0x3
+; CHECK-BE-NEXT:    subs x0, x4, x0
+; CHECK-BE-NEXT:    extr x4, x14, x10, #63
+; CHECK-BE-NEXT:    sbcs x3, x5, xzr
+; CHECK-BE-NEXT:    orr x10, x15, x10, lsl #1
+; CHECK-BE-NEXT:    sbc x2, x2, xzr
+; CHECK-BE-NEXT:    subs x8, x8, #1
+; CHECK-BE-NEXT:    adcs x9, x9, x16
+; CHECK-BE-NEXT:    orr x14, x17, x4
+; CHECK-BE-NEXT:    adc x12, x12, x18
+; CHECK-BE-NEXT:    orr w15, w17, w7
+; CHECK-BE-NEXT:    and x12, x12, #0x1
+; CHECK-BE-NEXT:    and x4, x15, #0x1
+; CHECK-BE-NEXT:    orr x17, x8, x12
+; CHECK-BE-NEXT:    and x15, x6, #0x1
+; CHECK-BE-NEXT:    and x2, x2, #0x1
+; CHECK-BE-NEXT:    orr x5, x17, x9
+; CHECK-BE-NEXT:    mov x17, xzr
+; CHECK-BE-NEXT:    cbnz x5, .LBB0_4
+; CHECK-BE-NEXT:    b .LBB0_6
+; CHECK-BE-NEXT:  .LBB0_5:
+; CHECK-BE-NEXT:    mov x13, xzr
+; CHECK-BE-NEXT:  .LBB0_6: // %udiv-loop-exit
+; CHECK-BE-NEXT:    extr x9, x14, x10, #63
+; CHECK-BE-NEXT:    lsr x11, x14, #63
+; CHECK-BE-NEXT:    orr x8, x15, x10, lsl #1
+; CHECK-BE-NEXT:    orr w10, w13, w11
+; CHECK-BE-NEXT:    and x14, x10, #0x1
+; CHECK-BE-NEXT:    orr x12, x13, x9
+; CHECK-BE-NEXT:  .LBB0_7: // %udiv-end
+; CHECK-BE-NEXT:    extr x9, x14, x12, #8
+; CHECK-BE-NEXT:    extr x10, x12, x8, #8
+; CHECK-BE-NEXT:    strb w8, [x1, #16]
+; CHECK-BE-NEXT:    and x9, x9, #0x1ffffffffffffff
+; CHECK-BE-NEXT:    stp x9, x10, [x1]
+; CHECK-BE-NEXT:    ret
+  %a = load i129, i129* %ptr
+  %res = udiv i129 %a, 3
+  store i129 %res, i129* %out
+  ret void
+}
+
+define i129 @urem129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: urem129:
+; CHECK:       // %bb.0: // %_udiv-special-cases
+; CHECK-NEXT:    clz x9, x4
+; CHECK-NEXT:    clz x8, x5
+; CHECK-NEXT:    add x9, x9, #64
+; CHECK-NEXT:    cmp x5, #0
+; CHECK-NEXT:    clz x10, x0
+; CHECK-NEXT:    csel x8, x8, x9, ne
+; CHECK-NEXT:    clz x9, x1
+; CHECK-NEXT:    add x10, x10, #64
+; CHECK-NEXT:    cmp x1, #0
+; CHECK-NEXT:    add x8, x8, #128
+; CHECK-NEXT:    csel x9, x9, x10, ne
+; CHECK-NEXT:    ands x10, x2, #0x1
+; CHECK-NEXT:    clz x11, x10
+; CHECK-NEXT:    add x9, x9, #128
+; CHECK-NEXT:    add x11, x11, #64
+; CHECK-NEXT:    orr x10, x0, x10
+; CHECK-NEXT:    csel x9, x11, x9, ne
+; CHECK-NEXT:    ands x11, x6, #0x1
+; CHECK-NEXT:    orr x12, x4, x11
+; CHECK-NEXT:    clz x11, x11
+; CHECK-NEXT:    orr x12, x12, x5
+; CHECK-NEXT:    add x11, x11, #64
+; CHECK-NEXT:    csel x8, x11, x8, ne
+; CHECK-NEXT:    cmp x12, #0
+; CHECK-NEXT:    orr x10, x10, x1
+; CHECK-NEXT:    ccmp x10, #0, #4, ne
+; CHECK-NEXT:    cset w11, eq
+; CHECK-NEXT:    subs x8, x8, #127
+; CHECK-NEXT:    ngcs x12, xzr
+; CHECK-NEXT:    ngcs x13, xzr
+; CHECK-NEXT:    ngc x14, xzr
+; CHECK-NEXT:    subs x9, x9, #127
+; CHECK-NEXT:    ngcs x15, xzr
+; CHECK-NEXT:    ngcs x16, xzr
+; CHECK-NEXT:    ngc x17, xzr
+; CHECK-NEXT:    subs x10, x8, x9
+; CHECK-NEXT:    sbcs x9, x12, x15
+; CHECK-NEXT:    sbcs x13, x13, x16
+; CHECK-NEXT:    sbc x8, x14, x17
+; CHECK-NEXT:    cmp x10, #128
+; CHECK-NEXT:    cset w12, hi
+; CHECK-NEXT:    cmp x9, #0
+; CHECK-NEXT:    cset w14, ne
+; CHECK-NEXT:    csel w12, w12, w14, eq
+; CHECK-NEXT:    cmp x13, #0
+; CHECK-NEXT:    cset w14, ne
+; CHECK-NEXT:    cmp x8, #0
+; CHECK-NEXT:    cset w15, ne
+; CHECK-NEXT:    orr x8, x13, x8
+; CHECK-NEXT:    csel w14, w14, w15, eq
+; CHECK-NEXT:    cmp x8, #0
+; CHECK-NEXT:    csel w8, w12, w14, eq
+; CHECK-NEXT:    orr w14, w11, w8
+; CHECK-NEXT:    cmp w14, #0
+; CHECK-NEXT:    csel x12, xzr, x2, ne
+; CHECK-NEXT:    csel x11, xzr, x1, ne
+; CHECK-NEXT:    csel x8, xzr, x0, ne
+; CHECK-NEXT:    and x12, x12, #0x1
+; CHECK-NEXT:    tbnz w14, #0, .LBB1_7
+; CHECK-NEXT:  // %bb.1: // %_udiv-special-cases
+; CHECK-NEXT:    and x13, x13, #0x1
+; CHECK-NEXT:    eor x14, x10, #0x80
+; CHECK-NEXT:    orr x14, x14, x13
+; CHECK-NEXT:    orr x14, x14, x9
+; CHECK-NEXT:    cbz x14, .LBB1_7
+; CHECK-NEXT:  // %bb.2: // %udiv-bb1
+; CHECK-NEXT:    str x23, [sp, #-48]! // 8-byte Folded Spill
+; CHECK-NEXT:    mvn w11, w10
+; CHECK-NEXT:    mov w14, #128
+; CHECK-NEXT:    adds x8, x10, #1
+; CHECK-NEXT:    lsl x15, x1, #1
+; CHECK-NEXT:    sub x14, x14, x10
+; CHECK-NEXT:    lsr x12, x0, x10
+; CHECK-NEXT:    adcs x9, x9, xzr
+; CHECK-NEXT:    lsl x11, x15, x11
+; CHECK-NEXT:    cinc x13, x13, hs
+; CHECK-NEXT:    orr x11, x11, x12
+; CHECK-NEXT:    lsr x12, x1, x10
+; CHECK-NEXT:    tst x10, #0x40
+; CHECK-NEXT:    csel x12, x12, x11, ne
+; CHECK-NEXT:    lsl x11, x2, x14
+; CHECK-NEXT:    tst x14, #0x40
+; CHECK-NEXT:    mvn w17, w14
+; CHECK-NEXT:    csel x16, xzr, x11, ne
+; CHECK-NEXT:    neg x10, x10
+; CHECK-NEXT:    orr w12, w16, w12
+; CHECK-NEXT:    lsr x16, x0, #1
+; CHECK-NEXT:    and x11, x13, #0x1
+; CHECK-NEXT:    lsl x13, x1, x14
+; CHECK-NEXT:    lsr x16, x16, x17
+; CHECK-NEXT:    lsl x17, x0, x10
+; CHECK-NEXT:    orr x13, x13, x16
+; CHECK-NEXT:    lsl x16, x0, x14
+; CHECK-NEXT:    csel x13, x16, x13, ne
+; CHECK-NEXT:    csel x16, xzr, x16, ne
+; CHECK-NEXT:    tst x10, #0x40
+; CHECK-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    csel x10, xzr, x17, ne
+; CHECK-NEXT:    cmp x14, #128
+; CHECK-NEXT:    csel x17, x12, x10, lo
+; CHECK-NEXT:    csel x10, x16, xzr, lo
+; CHECK-NEXT:    csel x12, x13, xzr, lo
+; CHECK-NEXT:    orr x13, x8, x11
+; CHECK-NEXT:    cmp x14, #0
+; CHECK-NEXT:    orr x13, x13, x9
+; CHECK-NEXT:    csel x3, x2, x17, eq
+; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    cbz x13, .LBB1_5
+; CHECK-NEXT:  // %bb.3: // %udiv-preheader
+; CHECK-NEXT:    mvn w16, w8
+; CHECK-NEXT:    mov w7, #128
+; CHECK-NEXT:    lsr x17, x0, x8
+; CHECK-NEXT:    lsr x18, x1, x8
+; CHECK-NEXT:    tst x8, #0x40
+; CHECK-NEXT:    mov x13, xzr
+; CHECK-NEXT:    lsl x15, x15, x16
+; CHECK-NEXT:    sub x16, x7, x8
+; CHECK-NEXT:    orr x15, x15, x17
+; CHECK-NEXT:    csel x17, xzr, x18, ne
+; CHECK-NEXT:    csel x15, x18, x15, ne
+; CHECK-NEXT:    and x18, x2, #0x1
+; CHECK-NEXT:    lsl x7, x18, x16
+; CHECK-NEXT:    tst x16, #0x40
+; CHECK-NEXT:    csel x16, xzr, x7, ne
+; CHECK-NEXT:    csel x7, x7, xzr, ne
+; CHECK-NEXT:    subs x19, x8, #128
+; CHECK-NEXT:    orr x17, x17, x7
+; CHECK-NEXT:    csel x17, x17, xzr, lo
+; CHECK-NEXT:    tst x19, #0x40
+; CHECK-NEXT:    orr x15, x15, x16
+; CHECK-NEXT:    mov x16, #-1
+; CHECK-NEXT:    lsr x18, x18, x19
+; CHECK-NEXT:    mov x14, xzr
+; CHECK-NEXT:    csel x18, xzr, x18, ne
+; CHECK-NEXT:    cmp x8, #128
+; CHECK-NEXT:    csel x15, x15, x18, lo
+; CHECK-NEXT:    cmp x8, #0
+; CHECK-NEXT:    csel x20, x0, x15, eq
+; CHECK-NEXT:    csel x19, x1, x17, eq
+; CHECK-NEXT:    subs x15, x4, #1
+; CHECK-NEXT:    mov w18, #1
+; CHECK-NEXT:    adcs x17, x5, x16
+; CHECK-NEXT:    and x21, x3, #0x1
+; CHECK-NEXT:    adc x7, x6, x18
+; CHECK-NEXT:    and x7, x7, #0x1
+; CHECK-NEXT:  .LBB1_4: // %udiv-do-while
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    extr x22, x19, x20, #63
+; CHECK-NEXT:    bfi x21, x20, #1, #63
+; CHECK-NEXT:    cmp x15, x21
+; CHECK-NEXT:    lsr x19, x19, #63
+; CHECK-NEXT:    mov x3, xzr
+; CHECK-NEXT:    sbcs xzr, x17, x22
+; CHECK-NEXT:    sbc x19, x7, x19
+; CHECK-NEXT:    sbfx x23, x19, #0, #1
+; CHECK-NEXT:    and x19, x23, x4
+; CHECK-NEXT:    subs x20, x21, x19
+; CHECK-NEXT:    and x19, x23, x5
+; CHECK-NEXT:    sbcs x19, x22, x19
+; CHECK-NEXT:    extr x21, x12, x10, #63
+; CHECK-NEXT:    subs x8, x8, #1
+; CHECK-NEXT:    lsr x12, x12, #63
+; CHECK-NEXT:    adcs x9, x9, x16
+; CHECK-NEXT:    orr w12, w14, w12
+; CHECK-NEXT:    adc x11, x11, x18
+; CHECK-NEXT:    and x22, x12, #0x1
+; CHECK-NEXT:    and x11, x11, #0x1
+; CHECK-NEXT:    orr x12, x14, x21
+; CHECK-NEXT:    orr x14, x8, x11
+; CHECK-NEXT:    orr x10, x13, x10, lsl #1
+; CHECK-NEXT:    and x13, x23, #0x1
+; CHECK-NEXT:    orr x23, x14, x9
+; CHECK-NEXT:    mov x21, x22
+; CHECK-NEXT:    mov x14, xzr
+; CHECK-NEXT:    cbnz x23, .LBB1_4
+; CHECK-NEXT:    b .LBB1_6
+; CHECK-NEXT:  .LBB1_5:
+; CHECK-NEXT:    mov x3, xzr
+; CHECK-NEXT:  .LBB1_6: // %udiv-loop-exit
+; CHECK-NEXT:    extr x9, x12, x10, #63
+; CHECK-NEXT:    lsr x11, x12, #63
+; CHECK-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    orr x8, x13, x10, lsl #1
+; CHECK-NEXT:    orr w10, w3, w11
+; CHECK-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    orr x11, x3, x9
+; CHECK-NEXT:    and x12, x10, #0x1
+; CHECK-NEXT:    ldr x23, [sp], #48 // 8-byte Folded Reload
+; CHECK-NEXT:  .LBB1_7: // %udiv-end
+; CHECK-NEXT:    umulh x10, x4, x8
+; CHECK-NEXT:    mul x13, x5, x8
+; CHECK-NEXT:    umulh x9, x5, x8
+; CHECK-NEXT:    mul x14, x4, x11
+; CHECK-NEXT:    adds x10, x13, x10
+; CHECK-NEXT:    umulh x13, x4, x11
+; CHECK-NEXT:    cinc x9, x9, hs
+; CHECK-NEXT:    adds x10, x14, x10
+; CHECK-NEXT:    adc x9, x9, x13
+; CHECK-NEXT:    madd x9, x5, x11, x9
+; CHECK-NEXT:    mul x11, x8, x6
+; CHECK-NEXT:    mul x8, x4, x8
+; CHECK-NEXT:    madd x11, x12, x4, x11
+; CHECK-NEXT:    subs x0, x0, x8
+; CHECK-NEXT:    add x8, x9, x11
+; CHECK-NEXT:    sbcs x1, x1, x10
+; CHECK-NEXT:    sbc x8, x2, x8
+; CHECK-NEXT:    and x2, x8, #0x1
+; CHECK-NEXT:    ret
+;
+; CHECK-BE-LABEL: urem129:
+; CHECK-BE:       // %bb.0: // %_udiv-special-cases
+; CHECK-BE-NEXT:    clz x9, x6
+; CHECK-BE-NEXT:    clz x8, x5
+; CHECK-BE-NEXT:    add x9, x9, #64
+; CHECK-BE-NEXT:    cmp x5, #0
+; CHECK-BE-NEXT:    clz x10, x2
+; CHECK-BE-NEXT:    csel x8, x8, x9, ne
+; CHECK-BE-NEXT:    clz x9, x1
+; CHECK-BE-NEXT:    add x10, x10, #64
+; CHECK-BE-NEXT:    cmp x1, #0
+; CHECK-BE-NEXT:    add x8, x8, #128
+; CHECK-BE-NEXT:    csel x9, x9, x10, ne
+; CHECK-BE-NEXT:    ands x10, x0, #0x1
+; CHECK-BE-NEXT:    clz x11, x10
+; CHECK-BE-NEXT:    add x9, x9, #128
+; CHECK-BE-NEXT:    add x11, x11, #64
+; CHECK-BE-NEXT:    orr x10, x2, x10
+; CHECK-BE-NEXT:    csel x9, x11, x9, ne
+; CHECK-BE-NEXT:    ands x11, x4, #0x1
+; CHECK-BE-NEXT:    orr x12, x6, x11
+; CHECK-BE-NEXT:    clz x11, x11
+; CHECK-BE-NEXT:    orr x12, x12, x5
+; CHECK-BE-NEXT:    add x11, x11, #64
+; CHECK-BE-NEXT:    csel x8, x11, x8, ne
+; CHECK-BE-NEXT:    cmp x12, #0
+; CHECK-BE-NEXT:    orr x10, x10, x1
+; CHECK-BE-NEXT:    ccmp x10, #0, #4, ne
+; CHECK-BE-NEXT:    cset w11, eq
+; CHECK-BE-NEXT:    subs x8, x8, #127
+; CHECK-BE-NEXT:    ngcs x12, xzr
+; CHECK-BE-NEXT:    ngcs x13, xzr
+; CHECK-BE-NEXT:    ngc x14, xzr
+; CHECK-BE-NEXT:    subs x9, x9, #127
+; CHECK-BE-NEXT:    ngcs x15, xzr
+; CHECK-BE-NEXT:    ngcs x16, xzr
+; CHECK-BE-NEXT:    ngc x17, xzr
+; CHECK-BE-NEXT:    subs x10, x8, x9
+; CHECK-BE-NEXT:    sbcs x9, x12, x15
+; CHECK-BE-NEXT:    sbcs x13, x13, x16
+; CHECK-BE-NEXT:    sbc x8, x14, x17
+; CHECK-BE-NEXT:    cmp x10, #128
+; CHECK-BE-NEXT:    cset w12, hi
+; CHECK-BE-NEXT:    cmp x9, #0
+; CHECK-BE-NEXT:    cset w14, ne
+; CHECK-BE-NEXT:    csel w12, w12, w14, eq
+; CHECK-BE-NEXT:    cmp x13, #0
+; CHECK-BE-NEXT:    cset w14, ne
+; CHECK-BE-NEXT:    cmp x8, #0
+; CHECK-BE-NEXT:    cset w15, ne
+; CHECK-BE-NEXT:    orr x8, x13, x8
+; CHECK-BE-NEXT:    csel w14, w14, w15, eq
+; CHECK-BE-NEXT:    cmp x8, #0
+; CHECK-BE-NEXT:    csel w8, w12, w14, eq
+; CHECK-BE-NEXT:    orr w14, w11, w8
+; CHECK-BE-NEXT:    cmp w14, #0
+; CHECK-BE-NEXT:    csel x11, xzr, x0, ne
+; CHECK-BE-NEXT:    csel x8, xzr, x2, ne
+; CHECK-BE-NEXT:    csel x12, xzr, x1, ne
+; CHECK-BE-NEXT:    and x11, x11, #0x1
+; CHECK-BE-NEXT:    tbnz w14, #0, .LBB1_7
+; CHECK-BE-NEXT:  // %bb.1: // %_udiv-special-cases
+; CHECK-BE-NEXT:    and x13, x13, #0x1
+; CHECK-BE-NEXT:    eor x14, x10, #0x80
+; CHECK-BE-NEXT:    orr x14, x14, x13
+; CHECK-BE-NEXT:    orr x14, x14, x9
+; CHECK-BE-NEXT:    cbz x14, .LBB1_7
+; CHECK-BE-NEXT:  // %bb.2: // %udiv-bb1
+; CHECK-BE-NEXT:    stp x26, x25, [sp, #-64]! // 16-byte Folded Spill
+; CHECK-BE-NEXT:    mvn w11, w10
+; CHECK-BE-NEXT:    mov w14, #128
+; CHECK-BE-NEXT:    adds x8, x10, #1
+; CHECK-BE-NEXT:    lsl x15, x1, #1
+; CHECK-BE-NEXT:    sub x14, x14, x10
+; CHECK-BE-NEXT:    lsr x12, x2, x10
+; CHECK-BE-NEXT:    adcs x9, x9, xzr
+; CHECK-BE-NEXT:    lsl x11, x15, x11
+; CHECK-BE-NEXT:    cinc x13, x13, hs
+; CHECK-BE-NEXT:    orr x11, x11, x12
+; CHECK-BE-NEXT:    lsr x12, x1, x10
+; CHECK-BE-NEXT:    tst x10, #0x40
+; CHECK-BE-NEXT:    csel x12, x12, x11, ne
+; CHECK-BE-NEXT:    lsl x11, x0, x14
+; CHECK-BE-NEXT:    tst x14, #0x40
+; CHECK-BE-NEXT:    mvn w17, w14
+; CHECK-BE-NEXT:    csel x16, xzr, x11, ne
+; CHECK-BE-NEXT:    neg x10, x10
+; CHECK-BE-NEXT:    orr w12, w16, w12
+; CHECK-BE-NEXT:    lsr x16, x2, #1
+; CHECK-BE-NEXT:    and x11, x13, #0x1
+; CHECK-BE-NEXT:    lsl x13, x1, x14
+; CHECK-BE-NEXT:    lsr x16, x16, x17
+; CHECK-BE-NEXT:    lsl x17, x2, x10
+; CHECK-BE-NEXT:    orr x13, x13, x16
+; CHECK-BE-NEXT:    lsl x16, x2, x14
+; CHECK-BE-NEXT:    csel x13, x16, x13, ne
+; CHECK-BE-NEXT:    csel x16, xzr, x16, ne
+; CHECK-BE-NEXT:    tst x10, #0x40
+; CHECK-BE-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    csel x10, xzr, x17, ne
+; CHECK-BE-NEXT:    cmp x14, #128
+; CHECK-BE-NEXT:    csel x17, x12, x10, lo
+; CHECK-BE-NEXT:    csel x10, x16, xzr, lo
+; CHECK-BE-NEXT:    csel x12, x13, xzr, lo
+; CHECK-BE-NEXT:    orr x13, x8, x11
+; CHECK-BE-NEXT:    cmp x14, #0
+; CHECK-BE-NEXT:    orr x13, x13, x9
+; CHECK-BE-NEXT:    csel x3, x0, x17, eq
+; CHECK-BE-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    cbz x13, .LBB1_5
+; CHECK-BE-NEXT:  // %bb.3: // %udiv-preheader
+; CHECK-BE-NEXT:    mvn w16, w8
+; CHECK-BE-NEXT:    lsr x17, x2, x8
+; CHECK-BE-NEXT:    lsr x18, x1, x8
+; CHECK-BE-NEXT:    tst x8, #0x40
+; CHECK-BE-NEXT:    and x7, x0, #0x1
+; CHECK-BE-NEXT:    mov x14, xzr
+; CHECK-BE-NEXT:    lsl x15, x15, x16
+; CHECK-BE-NEXT:    mov w16, #128
+; CHECK-BE-NEXT:    orr x15, x15, x17
+; CHECK-BE-NEXT:    sub x16, x16, x8
+; CHECK-BE-NEXT:    csel x17, xzr, x18, ne
+; CHECK-BE-NEXT:    csel x15, x18, x15, ne
+; CHECK-BE-NEXT:    lsr x18, x7, x8
+; CHECK-BE-NEXT:    mov x13, xzr
+; CHECK-BE-NEXT:    csel x18, xzr, x18, ne
+; CHECK-BE-NEXT:    cmp x8, #128
+; CHECK-BE-NEXT:    csel x21, x18, xzr, lo
+; CHECK-BE-NEXT:    lsl x18, x7, x16
+; CHECK-BE-NEXT:    tst x16, #0x40
+; CHECK-BE-NEXT:    and x22, x3, #0x1
+; CHECK-BE-NEXT:    csel x16, xzr, x18, ne
+; CHECK-BE-NEXT:    csel x18, x18, xzr, ne
+; CHECK-BE-NEXT:    subs x19, x8, #128
+; CHECK-BE-NEXT:    orr x17, x17, x18
+; CHECK-BE-NEXT:    csel x17, x17, xzr, lo
+; CHECK-BE-NEXT:    tst x19, #0x40
+; CHECK-BE-NEXT:    orr x15, x15, x16
+; CHECK-BE-NEXT:    mov x16, #-1
+; CHECK-BE-NEXT:    lsr x18, x7, x19
+; CHECK-BE-NEXT:    csel x18, xzr, x18, ne
+; CHECK-BE-NEXT:    cmp x8, #128
+; CHECK-BE-NEXT:    csel x15, x15, x18, lo
+; CHECK-BE-NEXT:    cmp x8, #0
+; CHECK-BE-NEXT:    csel x19, x2, x15, eq
+; CHECK-BE-NEXT:    csel x20, x1, x17, eq
+; CHECK-BE-NEXT:    subs x15, x6, #1
+; CHECK-BE-NEXT:    mov w18, #1
+; CHECK-BE-NEXT:    adcs x17, x5, x16
+; CHECK-BE-NEXT:    adc x7, x4, x18
+; CHECK-BE-NEXT:    and x7, x7, #0x1
+; CHECK-BE-NEXT:  .LBB1_4: // %udiv-do-while
+; CHECK-BE-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-BE-NEXT:    extr x23, x20, x19, #63
+; CHECK-BE-NEXT:    extr x21, x21, x20, #63
+; CHECK-BE-NEXT:    bfi x22, x19, #1, #63
+; CHECK-BE-NEXT:    extr x24, x12, x10, #63
+; CHECK-BE-NEXT:    cmp x15, x22
+; CHECK-BE-NEXT:    lsr x25, x12, #63
+; CHECK-BE-NEXT:    sbcs xzr, x17, x23
+; CHECK-BE-NEXT:    orr x10, x13, x10, lsl #1
+; CHECK-BE-NEXT:    sbc x19, x7, x21
+; CHECK-BE-NEXT:    orr w13, w14, w25
+; CHECK-BE-NEXT:    sbfx x26, x19, #0, #1
+; CHECK-BE-NEXT:    mov x3, xzr
+; CHECK-BE-NEXT:    and x12, x26, x6
+; CHECK-BE-NEXT:    subs x19, x22, x12
+; CHECK-BE-NEXT:    and x12, x26, x5
+; CHECK-BE-NEXT:    sbcs x20, x23, x12
+; CHECK-BE-NEXT:    and x12, x26, x4
+; CHECK-BE-NEXT:    sbc x21, x21, x12
+; CHECK-BE-NEXT:    subs x8, x8, #1
+; CHECK-BE-NEXT:    adcs x9, x9, x16
+; CHECK-BE-NEXT:    orr x12, x14, x24
+; CHECK-BE-NEXT:    adc x11, x11, x18
+; CHECK-BE-NEXT:    and x22, x13, #0x1
+; CHECK-BE-NEXT:    and x11, x11, #0x1
+; CHECK-BE-NEXT:    and x13, x26, #0x1
+; CHECK-BE-NEXT:    orr x14, x8, x11
+; CHECK-BE-NEXT:    and x21, x21, #0x1
+; CHECK-BE-NEXT:    orr x23, x14, x9
+; CHECK-BE-NEXT:    mov x14, xzr
+; CHECK-BE-NEXT:    cbnz x23, .LBB1_4
+; CHECK-BE-NEXT:    b .LBB1_6
+; CHECK-BE-NEXT:  .LBB1_5:
+; CHECK-BE-NEXT:    mov x3, xzr
+; CHECK-BE-NEXT:  .LBB1_6: // %udiv-loop-exit
+; CHECK-BE-NEXT:    extr x9, x12, x10, #63
+; CHECK-BE-NEXT:    lsr x11, x12, #63
+; CHECK-BE-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    orr x8, x13, x10, lsl #1
+; CHECK-BE-NEXT:    orr w10, w3, w11
+; CHECK-BE-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    orr x12, x3, x9
+; CHECK-BE-NEXT:    and x11, x10, #0x1
+; CHECK-BE-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    ldp x26, x25, [sp], #64 // 16-byte Folded Reload
+; CHECK-BE-NEXT:  .LBB1_7: // %udiv-end
+; CHECK-BE-NEXT:    umulh x10, x6, x8
+; CHECK-BE-NEXT:    mul x13, x5, x8
+; CHECK-BE-NEXT:    umulh x9, x5, x8
+; CHECK-BE-NEXT:    mul x14, x6, x12
+; CHECK-BE-NEXT:    adds x10, x13, x10
+; CHECK-BE-NEXT:    umulh x13, x6, x12
+; CHECK-BE-NEXT:    cinc x9, x9, hs
+; CHECK-BE-NEXT:    adds x10, x14, x10
+; CHECK-BE-NEXT:    adc x9, x9, x13
+; CHECK-BE-NEXT:    madd x9, x5, x12, x9
+; CHECK-BE-NEXT:    mul x12, x8, x4
+; CHECK-BE-NEXT:    mul x8, x6, x8
+; CHECK-BE-NEXT:    madd x11, x11, x6, x12
+; CHECK-BE-NEXT:    subs x2, x2, x8
+; CHECK-BE-NEXT:    add x8, x9, x11
+; CHECK-BE-NEXT:    sbcs x1, x1, x10
+; CHECK-BE-NEXT:    sbc x8, x0, x8
+; CHECK-BE-NEXT:    and x0, x8, #0x1
+; CHECK-BE-NEXT:    ret
+  %res = urem i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @sdiv129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: sdiv129:
+; CHECK:       // %bb.0: // %_udiv-special-cases
+; CHECK-NEXT:    str x21, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT:    sbfx x10, x2, #0, #1
+; CHECK-NEXT:    sbfx x11, x6, #0, #1
+; CHECK-NEXT:    eor x8, x10, x0
+; CHECK-NEXT:    eor x9, x10, x1
+; CHECK-NEXT:    subs x13, x8, x10
+; CHECK-NEXT:    eor x8, x10, x2
+; CHECK-NEXT:    sbcs x14, x9, x10
+; CHECK-NEXT:    eor x9, x11, x5
+; CHECK-NEXT:    sbc x12, x8, x10
+; CHECK-NEXT:    eor x8, x11, x4
+; CHECK-NEXT:    subs x8, x8, x11
+; CHECK-NEXT:    eor x15, x11, x6
+; CHECK-NEXT:    sbcs x9, x9, x11
+; CHECK-NEXT:    clz x17, x8
+; CHECK-NEXT:    sbc x15, x15, x11
+; CHECK-NEXT:    clz x16, x9
+; CHECK-NEXT:    add x17, x17, #64
+; CHECK-NEXT:    cmp x9, #0
+; CHECK-NEXT:    clz x18, x13
+; CHECK-NEXT:    csel x17, x16, x17, ne
+; CHECK-NEXT:    clz x16, x14
+; CHECK-NEXT:    add x18, x18, #64
+; CHECK-NEXT:    cmp x14, #0
+; CHECK-NEXT:    add x17, x17, #128
+; CHECK-NEXT:    csel x16, x16, x18, ne
+; CHECK-NEXT:    ands x0, x12, #0x1
+; CHECK-NEXT:    clz x18, x0
+; CHECK-NEXT:    add x16, x16, #128
+; CHECK-NEXT:    add x18, x18, #64
+; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    csel x18, x18, x16, ne
+; CHECK-NEXT:    ands x16, x15, #0x1
+; CHECK-NEXT:    clz x1, x16
+; CHECK-NEXT:    orr x15, x8, x16
+; CHECK-NEXT:    add x1, x1, #64
+; CHECK-NEXT:    orr x15, x15, x9
+; CHECK-NEXT:    csel x17, x1, x17, ne
+; CHECK-NEXT:    orr x1, x13, x0
+; CHECK-NEXT:    cmp x15, #0
+; CHECK-NEXT:    orr x15, x1, x14
+; CHECK-NEXT:    ccmp x15, #0, #4, ne
+; CHECK-NEXT:    eor x10, x11, x10
+; CHECK-NEXT:    cset w1, eq
+; CHECK-NEXT:    subs x15, x17, #127
+; CHECK-NEXT:    ngcs x3, xzr
+; CHECK-NEXT:    ngcs x4, xzr
+; CHECK-NEXT:    ngc x5, xzr
+; CHECK-NEXT:    subs x17, x18, #127
+; CHECK-NEXT:    ngcs x18, xzr
+; CHECK-NEXT:    ngcs x7, xzr
+; CHECK-NEXT:    ngc x19, xzr
+; CHECK-NEXT:    subs x17, x15, x17
+; CHECK-NEXT:    sbcs x15, x3, x18
+; CHECK-NEXT:    sbcs x3, x4, x7
+; CHECK-NEXT:    sbc x18, x5, x19
+; CHECK-NEXT:    cmp x17, #128
+; CHECK-NEXT:    cset w4, hi
+; CHECK-NEXT:    cmp x15, #0
+; CHECK-NEXT:    cset w5, ne
+; CHECK-NEXT:    csel w4, w4, w5, eq
+; CHECK-NEXT:    cmp x3, #0
+; CHECK-NEXT:    cset w5, ne
+; CHECK-NEXT:    cmp x18, #0
+; CHECK-NEXT:    cset w7, ne
+; CHECK-NEXT:    orr x18, x3, x18
+; CHECK-NEXT:    csel w5, w5, w7, eq
+; CHECK-NEXT:    cmp x18, #0
+; CHECK-NEXT:    csel w11, w4, w5, eq
+; CHECK-NEXT:    eor w18, w6, w2
+; CHECK-NEXT:    orr w2, w1, w11
+; CHECK-NEXT:    and x11, x18, #0x1
+; CHECK-NEXT:    cmp w2, #0
+; CHECK-NEXT:    csel x1, xzr, x12, ne
+; CHECK-NEXT:    csel x18, xzr, x14, ne
+; CHECK-NEXT:    csel x12, xzr, x13, ne
+; CHECK-NEXT:    and x1, x1, #0x1
+; CHECK-NEXT:    tbnz w2, #0, .LBB2_7
+; CHECK-NEXT:  // %bb.1: // %_udiv-special-cases
+; CHECK-NEXT:    and x2, x3, #0x1
+; CHECK-NEXT:    eor x3, x17, #0x80
+; CHECK-NEXT:    orr x3, x3, x2
+; CHECK-NEXT:    orr x3, x3, x15
+; CHECK-NEXT:    cbz x3, .LBB2_7
+; CHECK-NEXT:  // %bb.2: // %udiv-bb1
+; CHECK-NEXT:    mvn w18, w17
+; CHECK-NEXT:    mov w3, #128
+; CHECK-NEXT:    adds x12, x17, #1
+; CHECK-NEXT:    lsl x4, x14, #1
+; CHECK-NEXT:    sub x3, x3, x17
+; CHECK-NEXT:    lsr x1, x13, x17
+; CHECK-NEXT:    adcs x15, x15, xzr
+; CHECK-NEXT:    lsl x18, x4, x18
+; CHECK-NEXT:    cinc x2, x2, hs
+; CHECK-NEXT:    orr x18, x18, x1
+; CHECK-NEXT:    lsr x1, x14, x17
+; CHECK-NEXT:    tst x17, #0x40
+; CHECK-NEXT:    csel x1, x1, x18, ne
+; CHECK-NEXT:    lsl x18, x0, x3
+; CHECK-NEXT:    tst x3, #0x40
+; CHECK-NEXT:    mvn w6, w3
+; CHECK-NEXT:    csel x5, xzr, x18, ne
+; CHECK-NEXT:    neg x17, x17
+; CHECK-NEXT:    orr w1, w5, w1
+; CHECK-NEXT:    lsr x5, x13, #1
+; CHECK-NEXT:    and x18, x2, #0x1
+; CHECK-NEXT:    lsl x2, x14, x3
+; CHECK-NEXT:    lsr x5, x5, x6
+; CHECK-NEXT:    lsl x6, x13, x17
+; CHECK-NEXT:    orr x2, x2, x5
+; CHECK-NEXT:    lsl x5, x13, x3
+; CHECK-NEXT:    csel x2, x5, x2, ne
+; CHECK-NEXT:    csel x5, xzr, x5, ne
+; CHECK-NEXT:    tst x17, #0x40
+; CHECK-NEXT:    csel x17, xzr, x6, ne
+; CHECK-NEXT:    cmp x3, #128
+; CHECK-NEXT:    csel x6, x1, x17, lo
+; CHECK-NEXT:    csel x17, x5, xzr, lo
+; CHECK-NEXT:    csel x1, x2, xzr, lo
+; CHECK-NEXT:    orr x2, x12, x18
+; CHECK-NEXT:    cmp x3, #0
+; CHECK-NEXT:    orr x2, x2, x15
+; CHECK-NEXT:    csel x5, x0, x6, eq
+; CHECK-NEXT:    cbz x2, .LBB2_5
+; CHECK-NEXT:  // %bb.3: // %udiv-preheader
+; CHECK-NEXT:    mvn w6, w12
+; CHECK-NEXT:    mov w20, #128
+; CHECK-NEXT:    lsr x7, x13, x12
+; CHECK-NEXT:    lsr x19, x14, x12
+; CHECK-NEXT:    tst x12, #0x40
+; CHECK-NEXT:    and x0, x0, #0x1
+; CHECK-NEXT:    lsl x4, x4, x6
+; CHECK-NEXT:    sub x6, x20, x12
+; CHECK-NEXT:    orr x4, x4, x7
+; CHECK-NEXT:    csel x7, xzr, x19, ne
+; CHECK-NEXT:    csel x4, x19, x4, ne
+; CHECK-NEXT:    tst x6, #0x40
+; CHECK-NEXT:    lsl x19, x0, x6
+; CHECK-NEXT:    mov x2, xzr
+; CHECK-NEXT:    csel x6, xzr, x19, ne
+; CHECK-NEXT:    csel x19, x19, xzr, ne
+; CHECK-NEXT:    subs x20, x12, #128
+; CHECK-NEXT:    orr x7, x7, x19
+; CHECK-NEXT:    csel x19, x7, xzr, lo
+; CHECK-NEXT:    tst x20, #0x40
+; CHECK-NEXT:    orr x4, x4, x6
+; CHECK-NEXT:    mov x3, xzr
+; CHECK-NEXT:    lsr x0, x0, x20
+; CHECK-NEXT:    csel x0, xzr, x0, ne
+; CHECK-NEXT:    cmp x12, #128
+; CHECK-NEXT:    csel x0, x4, x0, lo
+; CHECK-NEXT:    cmp x12, #0
+; CHECK-NEXT:    csel x7, x13, x0, eq
+; CHECK-NEXT:    csel x6, x14, x19, eq
+; CHECK-NEXT:    subs x13, x8, #1
+; CHECK-NEXT:    mov x14, #-1
+; CHECK-NEXT:    adcs x0, x9, x14
+; CHECK-NEXT:    mov w4, #1
+; CHECK-NEXT:    adc x16, x16, x4
+; CHECK-NEXT:    and x19, x5, #0x1
+; CHECK-NEXT:    and x5, x16, #0x1
+; CHECK-NEXT:  .LBB2_4: // %udiv-do-while
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    extr x20, x6, x7, #63
+; CHECK-NEXT:    bfi x19, x7, #1, #63
+; CHECK-NEXT:    cmp x13, x19
+; CHECK-NEXT:    lsr x6, x6, #63
+; CHECK-NEXT:    mov x16, xzr
+; CHECK-NEXT:    sbcs xzr, x0, x20
+; CHECK-NEXT:    sbc x6, x5, x6
+; CHECK-NEXT:    sbfx x21, x6, #0, #1
+; CHECK-NEXT:    and x6, x21, x8
+; CHECK-NEXT:    subs x7, x19, x6
+; CHECK-NEXT:    and x6, x21, x9
+; CHECK-NEXT:    sbcs x6, x20, x6
+; CHECK-NEXT:    extr x19, x1, x17, #63
+; CHECK-NEXT:    subs x12, x12, #1
+; CHECK-NEXT:    lsr x1, x1, #63
+; CHECK-NEXT:    adcs x15, x15, x14
+; CHECK-NEXT:    orr w1, w3, w1
+; CHECK-NEXT:    adc x18, x18, x4
+; CHECK-NEXT:    and x20, x1, #0x1
+; CHECK-NEXT:    and x18, x18, #0x1
+; CHECK-NEXT:    orr x1, x3, x19
+; CHECK-NEXT:    orr x3, x12, x18
+; CHECK-NEXT:    orr x17, x2, x17, lsl #1
+; CHECK-NEXT:    and x2, x21, #0x1
+; CHECK-NEXT:    orr x21, x3, x15
+; CHECK-NEXT:    mov x19, x20
+; CHECK-NEXT:    mov x3, xzr
+; CHECK-NEXT:    cbnz x21, .LBB2_4
+; CHECK-NEXT:    b .LBB2_6
+; CHECK-NEXT:  .LBB2_5:
+; CHECK-NEXT:    mov x16, xzr
+; CHECK-NEXT:  .LBB2_6: // %udiv-loop-exit
+; CHECK-NEXT:    extr x8, x1, x17, #63
+; CHECK-NEXT:    lsr x9, x1, #63
+; CHECK-NEXT:    orr w9, w16, w9
+; CHECK-NEXT:    orr x12, x2, x17, lsl #1
+; CHECK-NEXT:    and x1, x9, #0x1
+; CHECK-NEXT:    orr x18, x16, x8
+; CHECK-NEXT:  .LBB2_7: // %udiv-end
+; CHECK-NEXT:    eor x8, x12, x10
+; CHECK-NEXT:    eor x9, x18, x10
+; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    subs x0, x8, x10
+; CHECK-NEXT:    eor x8, x1, x11
+; CHECK-NEXT:    sbcs x1, x9, x10
+; CHECK-NEXT:    sbc x8, x8, x11
+; CHECK-NEXT:    and x2, x8, #0x1
+; CHECK-NEXT:    ldr x21, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; CHECK-BE-LABEL: sdiv129:
+; CHECK-BE:       // %bb.0: // %_udiv-special-cases
+; CHECK-BE-NEXT:    str x25, [sp, #-64]! // 8-byte Folded Spill
+; CHECK-BE-NEXT:    sbfx x11, x0, #0, #1
+; CHECK-BE-NEXT:    sbfx x12, x4, #0, #1
+; CHECK-BE-NEXT:    eor x8, x11, x2
+; CHECK-BE-NEXT:    eor x9, x11, x1
+; CHECK-BE-NEXT:    subs x14, x8, x11
+; CHECK-BE-NEXT:    eor x8, x11, x0
+; CHECK-BE-NEXT:    sbcs x15, x9, x11
+; CHECK-BE-NEXT:    eor x9, x12, x5
+; CHECK-BE-NEXT:    sbc x13, x8, x11
+; CHECK-BE-NEXT:    eor x8, x12, x6
+; CHECK-BE-NEXT:    subs x8, x8, x12
+; CHECK-BE-NEXT:    eor x10, x12, x4
+; CHECK-BE-NEXT:    sbcs x9, x9, x12
+; CHECK-BE-NEXT:    clz x17, x8
+; CHECK-BE-NEXT:    sbc x10, x10, x12
+; CHECK-BE-NEXT:    clz x16, x9
+; CHECK-BE-NEXT:    add x17, x17, #64
+; CHECK-BE-NEXT:    cmp x9, #0
+; CHECK-BE-NEXT:    clz x18, x14
+; CHECK-BE-NEXT:    csel x16, x16, x17, ne
+; CHECK-BE-NEXT:    clz x17, x15
+; CHECK-BE-NEXT:    add x18, x18, #64
+; CHECK-BE-NEXT:    cmp x15, #0
+; CHECK-BE-NEXT:    add x16, x16, #128
+; CHECK-BE-NEXT:    csel x17, x17, x18, ne
+; CHECK-BE-NEXT:    ands x1, x13, #0x1
+; CHECK-BE-NEXT:    clz x18, x1
+; CHECK-BE-NEXT:    add x17, x17, #128
+; CHECK-BE-NEXT:    add x18, x18, #64
+; CHECK-BE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    csel x17, x18, x17, ne
+; CHECK-BE-NEXT:    ands x10, x10, #0x1
+; CHECK-BE-NEXT:    clz x2, x10
+; CHECK-BE-NEXT:    orr x18, x8, x10
+; CHECK-BE-NEXT:    add x2, x2, #64
+; CHECK-BE-NEXT:    orr x18, x18, x9
+; CHECK-BE-NEXT:    csel x16, x2, x16, ne
+; CHECK-BE-NEXT:    orr x2, x14, x1
+; CHECK-BE-NEXT:    cmp x18, #0
+; CHECK-BE-NEXT:    orr x18, x2, x15
+; CHECK-BE-NEXT:    ccmp x18, #0, #4, ne
+; CHECK-BE-NEXT:    eor x11, x12, x11
+; CHECK-BE-NEXT:    eor w0, w4, w0
+; CHECK-BE-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    cset w18, eq
+; CHECK-BE-NEXT:    subs x16, x16, #127
+; CHECK-BE-NEXT:    ngcs x2, xzr
+; CHECK-BE-NEXT:    ngcs x3, xzr
+; CHECK-BE-NEXT:    ngc x5, xzr
+; CHECK-BE-NEXT:    subs x17, x17, #127
+; CHECK-BE-NEXT:    ngcs x6, xzr
+; CHECK-BE-NEXT:    ngcs x7, xzr
+; CHECK-BE-NEXT:    ngc x19, xzr
+; CHECK-BE-NEXT:    subs x17, x16, x17
+; CHECK-BE-NEXT:    sbcs x16, x2, x6
+; CHECK-BE-NEXT:    sbcs x2, x3, x7
+; CHECK-BE-NEXT:    sbc x3, x5, x19
+; CHECK-BE-NEXT:    cmp x17, #128
+; CHECK-BE-NEXT:    cset w5, hi
+; CHECK-BE-NEXT:    cmp x16, #0
+; CHECK-BE-NEXT:    cset w6, ne
+; CHECK-BE-NEXT:    csel w5, w5, w6, eq
+; CHECK-BE-NEXT:    cmp x2, #0
+; CHECK-BE-NEXT:    cset w6, ne
+; CHECK-BE-NEXT:    cmp x3, #0
+; CHECK-BE-NEXT:    cset w7, ne
+; CHECK-BE-NEXT:    orr x3, x2, x3
+; CHECK-BE-NEXT:    csel w6, w6, w7, eq
+; CHECK-BE-NEXT:    cmp x3, #0
+; CHECK-BE-NEXT:    csel w12, w5, w6, eq
+; CHECK-BE-NEXT:    orr w3, w18, w12
+; CHECK-BE-NEXT:    and x12, x0, #0x1
+; CHECK-BE-NEXT:    cmp w3, #0
+; CHECK-BE-NEXT:    csel x0, xzr, x13, ne
+; CHECK-BE-NEXT:    csel x18, xzr, x14, ne
+; CHECK-BE-NEXT:    csel x13, xzr, x15, ne
+; CHECK-BE-NEXT:    and x0, x0, #0x1
+; CHECK-BE-NEXT:    tbnz w3, #0, .LBB2_7
+; CHECK-BE-NEXT:  // %bb.1: // %_udiv-special-cases
+; CHECK-BE-NEXT:    and x2, x2, #0x1
+; CHECK-BE-NEXT:    eor x3, x17, #0x80
+; CHECK-BE-NEXT:    orr x3, x3, x2
+; CHECK-BE-NEXT:    orr x3, x3, x16
+; CHECK-BE-NEXT:    cbz x3, .LBB2_7
+; CHECK-BE-NEXT:  // %bb.2: // %udiv-bb1
+; CHECK-BE-NEXT:    mvn w18, w17
+; CHECK-BE-NEXT:    mov w3, #128
+; CHECK-BE-NEXT:    adds x13, x17, #1
+; CHECK-BE-NEXT:    lsl x4, x15, #1
+; CHECK-BE-NEXT:    sub x3, x3, x17
+; CHECK-BE-NEXT:    lsr x0, x14, x17
+; CHECK-BE-NEXT:    adcs x16, x16, xzr
+; CHECK-BE-NEXT:    lsl x18, x4, x18
+; CHECK-BE-NEXT:    cinc x2, x2, hs
+; CHECK-BE-NEXT:    orr x18, x18, x0
+; CHECK-BE-NEXT:    lsr x0, x15, x17
+; CHECK-BE-NEXT:    tst x17, #0x40
+; CHECK-BE-NEXT:    csel x0, x0, x18, ne
+; CHECK-BE-NEXT:    lsl x18, x1, x3
+; CHECK-BE-NEXT:    tst x3, #0x40
+; CHECK-BE-NEXT:    mvn w6, w3
+; CHECK-BE-NEXT:    csel x5, xzr, x18, ne
+; CHECK-BE-NEXT:    neg x17, x17
+; CHECK-BE-NEXT:    orr w0, w5, w0
+; CHECK-BE-NEXT:    lsr x5, x14, #1
+; CHECK-BE-NEXT:    and x18, x2, #0x1
+; CHECK-BE-NEXT:    lsl x2, x15, x3
+; CHECK-BE-NEXT:    lsr x5, x5, x6
+; CHECK-BE-NEXT:    lsl x6, x14, x17
+; CHECK-BE-NEXT:    orr x2, x2, x5
+; CHECK-BE-NEXT:    lsl x5, x14, x3
+; CHECK-BE-NEXT:    csel x2, x5, x2, ne
+; CHECK-BE-NEXT:    csel x5, xzr, x5, ne
+; CHECK-BE-NEXT:    tst x17, #0x40
+; CHECK-BE-NEXT:    csel x17, xzr, x6, ne
+; CHECK-BE-NEXT:    cmp x3, #128
+; CHECK-BE-NEXT:    csel x6, x0, x17, lo
+; CHECK-BE-NEXT:    csel x17, x5, xzr, lo
+; CHECK-BE-NEXT:    csel x0, x2, xzr, lo
+; CHECK-BE-NEXT:    orr x2, x13, x18
+; CHECK-BE-NEXT:    cmp x3, #0
+; CHECK-BE-NEXT:    orr x2, x2, x16
+; CHECK-BE-NEXT:    csel x5, x1, x6, eq
+; CHECK-BE-NEXT:    cbz x2, .LBB2_5
+; CHECK-BE-NEXT:  // %bb.3: // %udiv-preheader
+; CHECK-BE-NEXT:    mvn w6, w13
+; CHECK-BE-NEXT:    lsr x7, x14, x13
+; CHECK-BE-NEXT:    lsr x19, x15, x13
+; CHECK-BE-NEXT:    tst x13, #0x40
+; CHECK-BE-NEXT:    and x1, x1, #0x1
+; CHECK-BE-NEXT:    mov x3, xzr
+; CHECK-BE-NEXT:    lsl x4, x4, x6
+; CHECK-BE-NEXT:    mov w6, #128
+; CHECK-BE-NEXT:    orr x4, x4, x7
+; CHECK-BE-NEXT:    sub x6, x6, x13
+; CHECK-BE-NEXT:    csel x7, xzr, x19, ne
+; CHECK-BE-NEXT:    csel x4, x19, x4, ne
+; CHECK-BE-NEXT:    lsr x19, x1, x13
+; CHECK-BE-NEXT:    mov x2, xzr
+; CHECK-BE-NEXT:    csel x19, xzr, x19, ne
+; CHECK-BE-NEXT:    cmp x13, #128
+; CHECK-BE-NEXT:    csel x20, x19, xzr, lo
+; CHECK-BE-NEXT:    lsl x19, x1, x6
+; CHECK-BE-NEXT:    tst x6, #0x40
+; CHECK-BE-NEXT:    csel x6, xzr, x19, ne
+; CHECK-BE-NEXT:    csel x19, x19, xzr, ne
+; CHECK-BE-NEXT:    subs x21, x13, #128
+; CHECK-BE-NEXT:    orr x7, x7, x19
+; CHECK-BE-NEXT:    csel x19, x7, xzr, lo
+; CHECK-BE-NEXT:    tst x21, #0x40
+; CHECK-BE-NEXT:    orr x4, x4, x6
+; CHECK-BE-NEXT:    lsr x1, x1, x21
+; CHECK-BE-NEXT:    and x21, x5, #0x1
+; CHECK-BE-NEXT:    csel x1, xzr, x1, ne
+; CHECK-BE-NEXT:    cmp x13, #128
+; CHECK-BE-NEXT:    csel x1, x4, x1, lo
+; CHECK-BE-NEXT:    cmp x13, #0
+; CHECK-BE-NEXT:    csel x7, x14, x1, eq
+; CHECK-BE-NEXT:    csel x19, x15, x19, eq
+; CHECK-BE-NEXT:    subs x14, x8, #1
+; CHECK-BE-NEXT:    mov x15, #-1
+; CHECK-BE-NEXT:    adcs x1, x9, x15
+; CHECK-BE-NEXT:    mov w4, #1
+; CHECK-BE-NEXT:    adc x6, x10, x4
+; CHECK-BE-NEXT:    and x6, x6, #0x1
+; CHECK-BE-NEXT:  .LBB2_4: // %udiv-do-while
+; CHECK-BE-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-BE-NEXT:    extr x22, x19, x7, #63
+; CHECK-BE-NEXT:    extr x20, x20, x19, #63
+; CHECK-BE-NEXT:    bfi x21, x7, #1, #63
+; CHECK-BE-NEXT:    extr x23, x0, x17, #63
+; CHECK-BE-NEXT:    cmp x14, x21
+; CHECK-BE-NEXT:    lsr x24, x0, #63
+; CHECK-BE-NEXT:    sbcs xzr, x1, x22
+; CHECK-BE-NEXT:    orr x17, x2, x17, lsl #1
+; CHECK-BE-NEXT:    sbc x7, x6, x20
+; CHECK-BE-NEXT:    orr w2, w3, w24
+; CHECK-BE-NEXT:    sbfx x25, x7, #0, #1
+; CHECK-BE-NEXT:    mov x5, xzr
+; CHECK-BE-NEXT:    and x0, x25, x8
+; CHECK-BE-NEXT:    subs x7, x21, x0
+; CHECK-BE-NEXT:    and x0, x25, x9
+; CHECK-BE-NEXT:    sbcs x19, x22, x0
+; CHECK-BE-NEXT:    and x0, x25, x10
+; CHECK-BE-NEXT:    sbc x20, x20, x0
+; CHECK-BE-NEXT:    subs x13, x13, #1
+; CHECK-BE-NEXT:    adcs x16, x16, x15
+; CHECK-BE-NEXT:    orr x0, x3, x23
+; CHECK-BE-NEXT:    adc x18, x18, x4
+; CHECK-BE-NEXT:    and x21, x2, #0x1
+; CHECK-BE-NEXT:    and x18, x18, #0x1
+; CHECK-BE-NEXT:    and x2, x25, #0x1
+; CHECK-BE-NEXT:    orr x3, x13, x18
+; CHECK-BE-NEXT:    and x20, x20, #0x1
+; CHECK-BE-NEXT:    orr x22, x3, x16
+; CHECK-BE-NEXT:    mov x3, xzr
+; CHECK-BE-NEXT:    cbnz x22, .LBB2_4
+; CHECK-BE-NEXT:    b .LBB2_6
+; CHECK-BE-NEXT:  .LBB2_5:
+; CHECK-BE-NEXT:    mov x5, xzr
+; CHECK-BE-NEXT:  .LBB2_6: // %udiv-loop-exit
+; CHECK-BE-NEXT:    extr x8, x0, x17, #63
+; CHECK-BE-NEXT:    lsr x9, x0, #63
+; CHECK-BE-NEXT:    orr w9, w5, w9
+; CHECK-BE-NEXT:    orr x18, x2, x17, lsl #1
+; CHECK-BE-NEXT:    and x0, x9, #0x1
+; CHECK-BE-NEXT:    orr x13, x5, x8
+; CHECK-BE-NEXT:  .LBB2_7: // %udiv-end
+; CHECK-BE-NEXT:    eor x8, x18, x11
+; CHECK-BE-NEXT:    eor x9, x13, x11
+; CHECK-BE-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    subs x2, x8, x11
+; CHECK-BE-NEXT:    eor x8, x0, x12
+; CHECK-BE-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    sbcs x1, x9, x11
+; CHECK-BE-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    sbc x8, x8, x12
+; CHECK-BE-NEXT:    and x0, x8, #0x1
+; CHECK-BE-NEXT:    ldr x25, [sp], #64 // 8-byte Folded Reload
+; CHECK-BE-NEXT:    ret
+  %res = sdiv i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @srem129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: srem129:
+; CHECK:       // %bb.0: // %_udiv-special-cases
+; CHECK-NEXT:    sbfx x8, x2, #0, #1
+; CHECK-NEXT:    sbfx x13, x6, #0, #1
+; CHECK-NEXT:    eor x9, x0, x8
+; CHECK-NEXT:    eor x10, x1, x8
+; CHECK-NEXT:    subs x9, x9, x8
+; CHECK-NEXT:    eor x11, x2, x8
+; CHECK-NEXT:    eor x14, x4, x13
+; CHECK-NEXT:    sbcs x10, x10, x8
+; CHECK-NEXT:    eor x12, x5, x13
+; CHECK-NEXT:    sbc x16, x11, x8
+; CHECK-NEXT:    subs x11, x14, x13
+; CHECK-NEXT:    eor x14, x6, x13
+; CHECK-NEXT:    sbcs x12, x12, x13
+; CHECK-NEXT:    clz x15, x11
+; CHECK-NEXT:    sbc x14, x14, x13
+; CHECK-NEXT:    clz x13, x12
+; CHECK-NEXT:    add x15, x15, #64
+; CHECK-NEXT:    cmp x12, #0
+; CHECK-NEXT:    clz x17, x9
+; CHECK-NEXT:    csel x15, x13, x15, ne
+; CHECK-NEXT:    clz x13, x10
+; CHECK-NEXT:    add x17, x17, #64
+; CHECK-NEXT:    cmp x10, #0
+; CHECK-NEXT:    add x15, x15, #128
+; CHECK-NEXT:    csel x17, x13, x17, ne
+; CHECK-NEXT:    ands x13, x16, #0x1
+; CHECK-NEXT:    clz x18, x13
+; CHECK-NEXT:    add x17, x17, #128
+; CHECK-NEXT:    add x18, x18, #64
+; CHECK-NEXT:    csel x17, x18, x17, ne
+; CHECK-NEXT:    ands x14, x14, #0x1
+; CHECK-NEXT:    clz x0, x14
+; CHECK-NEXT:    orr x18, x11, x14
+; CHECK-NEXT:    add x0, x0, #64
+; CHECK-NEXT:    orr x18, x18, x12
+; CHECK-NEXT:    csel x15, x0, x15, ne
+; CHECK-NEXT:    orr x0, x9, x13
+; CHECK-NEXT:    cmp x18, #0
+; CHECK-NEXT:    orr x18, x0, x10
+; CHECK-NEXT:    ccmp x18, #0, #4, ne
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    subs x15, x15, #127
+; CHECK-NEXT:    ngcs x1, xzr
+; CHECK-NEXT:    ngcs x3, xzr
+; CHECK-NEXT:    ngc x4, xzr
+; CHECK-NEXT:    subs x17, x17, #127
+; CHECK-NEXT:    ngcs x5, xzr
+; CHECK-NEXT:    ngcs x6, xzr
+; CHECK-NEXT:    ngc x7, xzr
+; CHECK-NEXT:    subs x18, x15, x17
+; CHECK-NEXT:    sbcs x17, x1, x5
+; CHECK-NEXT:    sbcs x3, x3, x6
+; CHECK-NEXT:    sbc x15, x4, x7
+; CHECK-NEXT:    cmp x18, #128
+; CHECK-NEXT:    cset w1, hi
+; CHECK-NEXT:    cmp x17, #0
+; CHECK-NEXT:    cset w4, ne
+; CHECK-NEXT:    csel w1, w1, w4, eq
+; CHECK-NEXT:    cmp x3, #0
+; CHECK-NEXT:    cset w4, ne
+; CHECK-NEXT:    cmp x15, #0
+; CHECK-NEXT:    cset w5, ne
+; CHECK-NEXT:    orr x15, x3, x15
+; CHECK-NEXT:    csel w4, w4, w5, eq
+; CHECK-NEXT:    cmp x15, #0
+; CHECK-NEXT:    csel w15, w1, w4, eq
+; CHECK-NEXT:    orr w4, w0, w15
+; CHECK-NEXT:    and x15, x2, #0x1
+; CHECK-NEXT:    cmp w4, #0
+; CHECK-NEXT:    csel x1, xzr, x16, ne
+; CHECK-NEXT:    csel x0, xzr, x10, ne
+; CHECK-NEXT:    csel x16, xzr, x9, ne
+; CHECK-NEXT:    and x1, x1, #0x1
+; CHECK-NEXT:    tbnz w4, #0, .LBB3_7
+; CHECK-NEXT:  // %bb.1: // %_udiv-special-cases
+; CHECK-NEXT:    and x2, x3, #0x1
+; CHECK-NEXT:    eor x3, x18, #0x80
+; CHECK-NEXT:    orr x3, x3, x2
+; CHECK-NEXT:    orr x3, x3, x17
+; CHECK-NEXT:    cbz x3, .LBB3_7
+; CHECK-NEXT:  // %bb.2: // %udiv-bb1
+; CHECK-NEXT:    str x25, [sp, #-64]! // 8-byte Folded Spill
+; CHECK-NEXT:    mvn w0, w18
+; CHECK-NEXT:    mov w3, #128
+; CHECK-NEXT:    adds x16, x18, #1
+; CHECK-NEXT:    lsl x4, x10, #1
+; CHECK-NEXT:    sub x3, x3, x18
+; CHECK-NEXT:    lsr x1, x9, x18
+; CHECK-NEXT:    adcs x17, x17, xzr
+; CHECK-NEXT:    lsl x0, x4, x0
+; CHECK-NEXT:    cinc x2, x2, hs
+; CHECK-NEXT:    orr x0, x0, x1
+; CHECK-NEXT:    lsr x1, x10, x18
+; CHECK-NEXT:    tst x18, #0x40
+; CHECK-NEXT:    csel x1, x1, x0, ne
+; CHECK-NEXT:    lsl x0, x13, x3
+; CHECK-NEXT:    tst x3, #0x40
+; CHECK-NEXT:    mvn w6, w3
+; CHECK-NEXT:    csel x5, xzr, x0, ne
+; CHECK-NEXT:    neg x18, x18
+; CHECK-NEXT:    orr w1, w5, w1
+; CHECK-NEXT:    lsr x5, x9, #1
+; CHECK-NEXT:    and x0, x2, #0x1
+; CHECK-NEXT:    lsl x2, x10, x3
+; CHECK-NEXT:    lsr x5, x5, x6
+; CHECK-NEXT:    lsl x6, x9, x18
+; CHECK-NEXT:    orr x2, x2, x5
+; CHECK-NEXT:    lsl x5, x9, x3
+; CHECK-NEXT:    csel x2, x5, x2, ne
+; CHECK-NEXT:    csel x5, xzr, x5, ne
+; CHECK-NEXT:    tst x18, #0x40
+; CHECK-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    csel x18, xzr, x6, ne
+; CHECK-NEXT:    cmp x3, #128
+; CHECK-NEXT:    csel x6, x1, x18, lo
+; CHECK-NEXT:    csel x18, x5, xzr, lo
+; CHECK-NEXT:    csel x1, x2, xzr, lo
+; CHECK-NEXT:    orr x2, x16, x0
+; CHECK-NEXT:    cmp x3, #0
+; CHECK-NEXT:    orr x2, x2, x17
+; CHECK-NEXT:    csel x19, x13, x6, eq
+; CHECK-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    cbz x2, .LBB3_5
+; CHECK-NEXT:  // %bb.3: // %udiv-preheader
+; CHECK-NEXT:    mvn w5, w16
+; CHECK-NEXT:    mov w20, #128
+; CHECK-NEXT:    lsr x6, x9, x16
+; CHECK-NEXT:    lsr x7, x10, x16
+; CHECK-NEXT:    tst x16, #0x40
+; CHECK-NEXT:    mov x2, xzr
+; CHECK-NEXT:    lsl x4, x4, x5
+; CHECK-NEXT:    sub x5, x20, x16
+; CHECK-NEXT:    orr x4, x4, x6
+; CHECK-NEXT:    csel x6, xzr, x7, ne
+; CHECK-NEXT:    csel x4, x7, x4, ne
+; CHECK-NEXT:    and x7, x13, #0x1
+; CHECK-NEXT:    lsl x20, x7, x5
+; CHECK-NEXT:    tst x5, #0x40
+; CHECK-NEXT:    csel x5, xzr, x20, ne
+; CHECK-NEXT:    csel x20, x20, xzr, ne
+; CHECK-NEXT:    subs x21, x16, #128
+; CHECK-NEXT:    orr x6, x6, x20
+; CHECK-NEXT:    csel x6, x6, xzr, lo
+; CHECK-NEXT:    tst x21, #0x40
+; CHECK-NEXT:    orr x4, x4, x5
+; CHECK-NEXT:    mov x5, #-1
+; CHECK-NEXT:    lsr x7, x7, x21
+; CHECK-NEXT:    mov x3, xzr
+; CHECK-NEXT:    csel x7, xzr, x7, ne
+; CHECK-NEXT:    cmp x16, #128
+; CHECK-NEXT:    csel x4, x4, x7, lo
+; CHECK-NEXT:    cmp x16, #0
+; CHECK-NEXT:    csel x22, x9, x4, eq
+; CHECK-NEXT:    csel x21, x10, x6, eq
+; CHECK-NEXT:    subs x4, x11, #1
+; CHECK-NEXT:    mov w7, #1
+; CHECK-NEXT:    adcs x6, x12, x5
+; CHECK-NEXT:    and x23, x19, #0x1
+; CHECK-NEXT:    adc x20, x14, x7
+; CHECK-NEXT:    and x20, x20, #0x1
+; CHECK-NEXT:  .LBB3_4: // %udiv-do-while
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    extr x24, x21, x22, #63
+; CHECK-NEXT:    bfi x23, x22, #1, #63
+; CHECK-NEXT:    cmp x4, x23
+; CHECK-NEXT:    lsr x21, x21, #63
+; CHECK-NEXT:    mov x19, xzr
+; CHECK-NEXT:    sbcs xzr, x6, x24
+; CHECK-NEXT:    sbc x21, x20, x21
+; CHECK-NEXT:    sbfx x25, x21, #0, #1
+; CHECK-NEXT:    and x21, x25, x11
+; CHECK-NEXT:    subs x22, x23, x21
+; CHECK-NEXT:    and x21, x25, x12
+; CHECK-NEXT:    sbcs x21, x24, x21
+; CHECK-NEXT:    extr x23, x1, x18, #63
+; CHECK-NEXT:    subs x16, x16, #1
+; CHECK-NEXT:    lsr x1, x1, #63
+; CHECK-NEXT:    adcs x17, x17, x5
+; CHECK-NEXT:    orr w1, w3, w1
+; CHECK-NEXT:    adc x0, x0, x7
+; CHECK-NEXT:    and x24, x1, #0x1
+; CHECK-NEXT:    and x0, x0, #0x1
+; CHECK-NEXT:    orr x1, x3, x23
+; CHECK-NEXT:    orr x3, x16, x0
+; CHECK-NEXT:    orr x18, x2, x18, lsl #1
+; CHECK-NEXT:    and x2, x25, #0x1
+; CHECK-NEXT:    orr x25, x3, x17
+; CHECK-NEXT:    mov x23, x24
+; CHECK-NEXT:    mov x3, xzr
+; CHECK-NEXT:    cbnz x25, .LBB3_4
+; CHECK-NEXT:    b .LBB3_6
+; CHECK-NEXT:  .LBB3_5:
+; CHECK-NEXT:    mov x19, xzr
+; CHECK-NEXT:  .LBB3_6: // %udiv-loop-exit
+; CHECK-NEXT:    extr x17, x1, x18, #63
+; CHECK-NEXT:    lsr x0, x1, #63
+; CHECK-NEXT:    orr x16, x2, x18, lsl #1
+; CHECK-NEXT:    orr w18, w19, w0
+; CHECK-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    orr x0, x19, x17
+; CHECK-NEXT:    and x1, x18, #0x1
+; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x25, [sp], #64 // 8-byte Folded Reload
+; CHECK-NEXT:  .LBB3_7: // %udiv-end
+; CHECK-NEXT:    umulh x18, x11, x16
+; CHECK-NEXT:    mul x2, x12, x16
+; CHECK-NEXT:    umulh x17, x12, x16
+; CHECK-NEXT:    mul x3, x11, x0
+; CHECK-NEXT:    adds x18, x2, x18
+; CHECK-NEXT:    umulh x2, x11, x0
+; CHECK-NEXT:    cinc x17, x17, hs
+; CHECK-NEXT:    adds x18, x3, x18
+; CHECK-NEXT:    mul x14, x16, x14
+; CHECK-NEXT:    adc x17, x17, x2
+; CHECK-NEXT:    mul x16, x11, x16
+; CHECK-NEXT:    madd x12, x12, x0, x17
+; CHECK-NEXT:    madd x11, x1, x11, x14
+; CHECK-NEXT:    subs x9, x9, x16
+; CHECK-NEXT:    sbcs x10, x10, x18
+; CHECK-NEXT:    eor x9, x9, x8
+; CHECK-NEXT:    add x11, x12, x11
+; CHECK-NEXT:    eor x10, x10, x8
+; CHECK-NEXT:    sbc x11, x13, x11
+; CHECK-NEXT:    subs x0, x9, x8
+; CHECK-NEXT:    eor x9, x11, x15
+; CHECK-NEXT:    sbcs x1, x10, x8
+; CHECK-NEXT:    sbc x8, x9, x15
+; CHECK-NEXT:    and x2, x8, #0x1
+; CHECK-NEXT:    ret
+;
+; CHECK-BE-LABEL: srem129:
+; CHECK-BE:       // %bb.0: // %_udiv-special-cases
+; CHECK-BE-NEXT:    sbfx x8, x0, #0, #1
+; CHECK-BE-NEXT:    sbfx x13, x4, #0, #1
+; CHECK-BE-NEXT:    eor x9, x2, x8
+; CHECK-BE-NEXT:    eor x10, x1, x8
+; CHECK-BE-NEXT:    subs x9, x9, x8
+; CHECK-BE-NEXT:    eor x11, x0, x8
+; CHECK-BE-NEXT:    eor x14, x6, x13
+; CHECK-BE-NEXT:    sbcs x10, x10, x8
+; CHECK-BE-NEXT:    eor x12, x5, x13
+; CHECK-BE-NEXT:    sbc x1, x11, x8
+; CHECK-BE-NEXT:    subs x11, x14, x13
+; CHECK-BE-NEXT:    eor x14, x4, x13
+; CHECK-BE-NEXT:    sbcs x12, x12, x13
+; CHECK-BE-NEXT:    clz x15, x11
+; CHECK-BE-NEXT:    sbc x14, x14, x13
+; CHECK-BE-NEXT:    clz x13, x12
+; CHECK-BE-NEXT:    add x15, x15, #64
+; CHECK-BE-NEXT:    cmp x12, #0
+; CHECK-BE-NEXT:    clz x16, x9
+; CHECK-BE-NEXT:    csel x15, x13, x15, ne
+; CHECK-BE-NEXT:    clz x13, x10
+; CHECK-BE-NEXT:    add x16, x16, #64
+; CHECK-BE-NEXT:    cmp x10, #0
+; CHECK-BE-NEXT:    add x15, x15, #128
+; CHECK-BE-NEXT:    csel x16, x13, x16, ne
+; CHECK-BE-NEXT:    ands x13, x1, #0x1
+; CHECK-BE-NEXT:    clz x17, x13
+; CHECK-BE-NEXT:    add x16, x16, #128
+; CHECK-BE-NEXT:    add x17, x17, #64
+; CHECK-BE-NEXT:    csel x16, x17, x16, ne
+; CHECK-BE-NEXT:    ands x14, x14, #0x1
+; CHECK-BE-NEXT:    clz x18, x14
+; CHECK-BE-NEXT:    orr x17, x11, x14
+; CHECK-BE-NEXT:    add x18, x18, #64
+; CHECK-BE-NEXT:    orr x17, x17, x12
+; CHECK-BE-NEXT:    csel x15, x18, x15, ne
+; CHECK-BE-NEXT:    orr x18, x9, x13
+; CHECK-BE-NEXT:    cmp x17, #0
+; CHECK-BE-NEXT:    orr x17, x18, x10
+; CHECK-BE-NEXT:    ccmp x17, #0, #4, ne
+; CHECK-BE-NEXT:    cset w3, eq
+; CHECK-BE-NEXT:    subs x15, x15, #127
+; CHECK-BE-NEXT:    ngcs x17, xzr
+; CHECK-BE-NEXT:    ngcs x2, xzr
+; CHECK-BE-NEXT:    ngc x4, xzr
+; CHECK-BE-NEXT:    subs x16, x16, #127
+; CHECK-BE-NEXT:    ngcs x5, xzr
+; CHECK-BE-NEXT:    ngcs x6, xzr
+; CHECK-BE-NEXT:    ngc x7, xzr
+; CHECK-BE-NEXT:    subs x18, x15, x16
+; CHECK-BE-NEXT:    sbcs x17, x17, x5
+; CHECK-BE-NEXT:    sbcs x2, x2, x6
+; CHECK-BE-NEXT:    sbc x15, x4, x7
+; CHECK-BE-NEXT:    cmp x18, #128
+; CHECK-BE-NEXT:    cset w16, hi
+; CHECK-BE-NEXT:    cmp x17, #0
+; CHECK-BE-NEXT:    cset w4, ne
+; CHECK-BE-NEXT:    csel w16, w16, w4, eq
+; CHECK-BE-NEXT:    cmp x2, #0
+; CHECK-BE-NEXT:    cset w4, ne
+; CHECK-BE-NEXT:    cmp x15, #0
+; CHECK-BE-NEXT:    cset w5, ne
+; CHECK-BE-NEXT:    orr x15, x2, x15
+; CHECK-BE-NEXT:    csel w4, w4, w5, eq
+; CHECK-BE-NEXT:    cmp x15, #0
+; CHECK-BE-NEXT:    csel w15, w16, w4, eq
+; CHECK-BE-NEXT:    orr w3, w3, w15
+; CHECK-BE-NEXT:    and x15, x0, #0x1
+; CHECK-BE-NEXT:    cmp w3, #0
+; CHECK-BE-NEXT:    csel x0, xzr, x1, ne
+; CHECK-BE-NEXT:    csel x16, xzr, x9, ne
+; CHECK-BE-NEXT:    csel x1, xzr, x10, ne
+; CHECK-BE-NEXT:    and x0, x0, #0x1
+; CHECK-BE-NEXT:    tbnz w3, #0, .LBB3_7
+; CHECK-BE-NEXT:  // %bb.1: // %_udiv-special-cases
+; CHECK-BE-NEXT:    and x2, x2, #0x1
+; CHECK-BE-NEXT:    eor x3, x18, #0x80
+; CHECK-BE-NEXT:    orr x3, x3, x2
+; CHECK-BE-NEXT:    orr x3, x3, x17
+; CHECK-BE-NEXT:    cbz x3, .LBB3_7
+; CHECK-BE-NEXT:  // %bb.2: // %udiv-bb1
+; CHECK-BE-NEXT:    stp x28, x27, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-BE-NEXT:    mvn w0, w18
+; CHECK-BE-NEXT:    mov w3, #128
+; CHECK-BE-NEXT:    adds x16, x18, #1
+; CHECK-BE-NEXT:    lsl x4, x10, #1
+; CHECK-BE-NEXT:    sub x3, x3, x18
+; CHECK-BE-NEXT:    lsr x1, x9, x18
+; CHECK-BE-NEXT:    adcs x17, x17, xzr
+; CHECK-BE-NEXT:    lsl x0, x4, x0
+; CHECK-BE-NEXT:    cinc x2, x2, hs
+; CHECK-BE-NEXT:    orr x0, x0, x1
+; CHECK-BE-NEXT:    lsr x1, x10, x18
+; CHECK-BE-NEXT:    tst x18, #0x40
+; CHECK-BE-NEXT:    csel x1, x1, x0, ne
+; CHECK-BE-NEXT:    lsl x0, x13, x3
+; CHECK-BE-NEXT:    tst x3, #0x40
+; CHECK-BE-NEXT:    mvn w6, w3
+; CHECK-BE-NEXT:    csel x5, xzr, x0, ne
+; CHECK-BE-NEXT:    neg x18, x18
+; CHECK-BE-NEXT:    orr w1, w5, w1
+; CHECK-BE-NEXT:    lsr x5, x9, #1
+; CHECK-BE-NEXT:    and x0, x2, #0x1
+; CHECK-BE-NEXT:    lsl x2, x10, x3
+; CHECK-BE-NEXT:    lsr x5, x5, x6
+; CHECK-BE-NEXT:    lsl x6, x9, x18
+; CHECK-BE-NEXT:    orr x2, x2, x5
+; CHECK-BE-NEXT:    lsl x5, x9, x3
+; CHECK-BE-NEXT:    csel x2, x5, x2, ne
+; CHECK-BE-NEXT:    csel x5, xzr, x5, ne
+; CHECK-BE-NEXT:    tst x18, #0x40
+; CHECK-BE-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    csel x18, xzr, x6, ne
+; CHECK-BE-NEXT:    cmp x3, #128
+; CHECK-BE-NEXT:    csel x6, x1, x18, lo
+; CHECK-BE-NEXT:    csel x18, x5, xzr, lo
+; CHECK-BE-NEXT:    csel x1, x2, xzr, lo
+; CHECK-BE-NEXT:    orr x2, x16, x0
+; CHECK-BE-NEXT:    cmp x3, #0
+; CHECK-BE-NEXT:    orr x2, x2, x17
+; CHECK-BE-NEXT:    csel x19, x13, x6, eq
+; CHECK-BE-NEXT:    stp x26, x25, [sp, #16] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    stp x24, x23, [sp, #32] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    cbz x2, .LBB3_5
+; CHECK-BE-NEXT:  // %bb.3: // %udiv-preheader
+; CHECK-BE-NEXT:    mvn w5, w16
+; CHECK-BE-NEXT:    lsr x6, x9, x16
+; CHECK-BE-NEXT:    lsr x7, x10, x16
+; CHECK-BE-NEXT:    tst x16, #0x40
+; CHECK-BE-NEXT:    and x20, x13, #0x1
+; CHECK-BE-NEXT:    mov x3, xzr
+; CHECK-BE-NEXT:    lsl x4, x4, x5
+; CHECK-BE-NEXT:    mov w5, #128
+; CHECK-BE-NEXT:    orr x4, x4, x6
+; CHECK-BE-NEXT:    sub x5, x5, x16
+; CHECK-BE-NEXT:    csel x6, xzr, x7, ne
+; CHECK-BE-NEXT:    csel x4, x7, x4, ne
+; CHECK-BE-NEXT:    lsr x7, x20, x16
+; CHECK-BE-NEXT:    mov x2, xzr
+; CHECK-BE-NEXT:    csel x7, xzr, x7, ne
+; CHECK-BE-NEXT:    cmp x16, #128
+; CHECK-BE-NEXT:    csel x23, x7, xzr, lo
+; CHECK-BE-NEXT:    lsl x7, x20, x5
+; CHECK-BE-NEXT:    tst x5, #0x40
+; CHECK-BE-NEXT:    and x24, x19, #0x1
+; CHECK-BE-NEXT:    csel x5, xzr, x7, ne
+; CHECK-BE-NEXT:    csel x7, x7, xzr, ne
+; CHECK-BE-NEXT:    subs x21, x16, #128
+; CHECK-BE-NEXT:    orr x6, x6, x7
+; CHECK-BE-NEXT:    csel x6, x6, xzr, lo
+; CHECK-BE-NEXT:    tst x21, #0x40
+; CHECK-BE-NEXT:    orr x4, x4, x5
+; CHECK-BE-NEXT:    mov x5, #-1
+; CHECK-BE-NEXT:    lsr x7, x20, x21
+; CHECK-BE-NEXT:    csel x7, xzr, x7, ne
+; CHECK-BE-NEXT:    cmp x16, #128
+; CHECK-BE-NEXT:    csel x4, x4, x7, lo
+; CHECK-BE-NEXT:    cmp x16, #0
+; CHECK-BE-NEXT:    csel x21, x9, x4, eq
+; CHECK-BE-NEXT:    csel x22, x10, x6, eq
+; CHECK-BE-NEXT:    subs x4, x11, #1
+; CHECK-BE-NEXT:    mov w7, #1
+; CHECK-BE-NEXT:    adcs x6, x12, x5
+; CHECK-BE-NEXT:    adc x20, x14, x7
+; CHECK-BE-NEXT:    and x20, x20, #0x1
+; CHECK-BE-NEXT:  .LBB3_4: // %udiv-do-while
+; CHECK-BE-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-BE-NEXT:    extr x25, x22, x21, #63
+; CHECK-BE-NEXT:    extr x23, x23, x22, #63
+; CHECK-BE-NEXT:    bfi x24, x21, #1, #63
+; CHECK-BE-NEXT:    extr x26, x1, x18, #63
+; CHECK-BE-NEXT:    cmp x4, x24
+; CHECK-BE-NEXT:    lsr x27, x1, #63
+; CHECK-BE-NEXT:    sbcs xzr, x6, x25
+; CHECK-BE-NEXT:    orr x18, x2, x18, lsl #1
+; CHECK-BE-NEXT:    sbc x21, x20, x23
+; CHECK-BE-NEXT:    orr w2, w3, w27
+; CHECK-BE-NEXT:    sbfx x28, x21, #0, #1
+; CHECK-BE-NEXT:    mov x19, xzr
+; CHECK-BE-NEXT:    and x1, x28, x11
+; CHECK-BE-NEXT:    subs x21, x24, x1
+; CHECK-BE-NEXT:    and x1, x28, x12
+; CHECK-BE-NEXT:    sbcs x22, x25, x1
+; CHECK-BE-NEXT:    and x1, x28, x14
+; CHECK-BE-NEXT:    sbc x23, x23, x1
+; CHECK-BE-NEXT:    subs x16, x16, #1
+; CHECK-BE-NEXT:    adcs x17, x17, x5
+; CHECK-BE-NEXT:    orr x1, x3, x26
+; CHECK-BE-NEXT:    adc x0, x0, x7
+; CHECK-BE-NEXT:    and x24, x2, #0x1
+; CHECK-BE-NEXT:    and x0, x0, #0x1
+; CHECK-BE-NEXT:    and x2, x28, #0x1
+; CHECK-BE-NEXT:    orr x3, x16, x0
+; CHECK-BE-NEXT:    and x23, x23, #0x1
+; CHECK-BE-NEXT:    orr x25, x3, x17
+; CHECK-BE-NEXT:    mov x3, xzr
+; CHECK-BE-NEXT:    cbnz x25, .LBB3_4
+; CHECK-BE-NEXT:    b .LBB3_6
+; CHECK-BE-NEXT:  .LBB3_5:
+; CHECK-BE-NEXT:    mov x19, xzr
+; CHECK-BE-NEXT:  .LBB3_6: // %udiv-loop-exit
+; CHECK-BE-NEXT:    extr x17, x1, x18, #63
+; CHECK-BE-NEXT:    lsr x0, x1, #63
+; CHECK-BE-NEXT:    orr x16, x2, x18, lsl #1
+; CHECK-BE-NEXT:    orr w18, w19, w0
+; CHECK-BE-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    orr x1, x19, x17
+; CHECK-BE-NEXT:    and x0, x18, #0x1
+; CHECK-BE-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    ldp x24, x23, [sp, #32] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    ldp x26, x25, [sp, #16] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    ldp x28, x27, [sp], #80 // 16-byte Folded Reload
+; CHECK-BE-NEXT:  .LBB3_7: // %udiv-end
+; CHECK-BE-NEXT:    umulh x18, x11, x16
+; CHECK-BE-NEXT:    mul x2, x12, x16
+; CHECK-BE-NEXT:    umulh x17, x12, x16
+; CHECK-BE-NEXT:    mul x3, x11, x1
+; CHECK-BE-NEXT:    adds x18, x2, x18
+; CHECK-BE-NEXT:    umulh x2, x11, x1
+; CHECK-BE-NEXT:    cinc x17, x17, hs
+; CHECK-BE-NEXT:    adds x18, x3, x18
+; CHECK-BE-NEXT:    mul x14, x16, x14
+; CHECK-BE-NEXT:    adc x17, x17, x2
+; CHECK-BE-NEXT:    mul x16, x11, x16
+; CHECK-BE-NEXT:    madd x12, x12, x1, x17
+; CHECK-BE-NEXT:    madd x11, x0, x11, x14
+; CHECK-BE-NEXT:    subs x9, x9, x16
+; CHECK-BE-NEXT:    sbcs x10, x10, x18
+; CHECK-BE-NEXT:    eor x9, x9, x8
+; CHECK-BE-NEXT:    add x11, x12, x11
+; CHECK-BE-NEXT:    eor x10, x10, x8
+; CHECK-BE-NEXT:    sbc x11, x13, x11
+; CHECK-BE-NEXT:    subs x2, x9, x8
+; CHECK-BE-NEXT:    eor x9, x11, x15
+; CHECK-BE-NEXT:    sbcs x1, x10, x8
+; CHECK-BE-NEXT:    sbc x8, x9, x15
+; CHECK-BE-NEXT:    and x0, x8, #0x1
+; CHECK-BE-NEXT:    ret
+  %res = srem i129 %a, %b
+  ret i129 %res
+}
+
+; Some higher sizes
+define i257 @sdiv257(i257 %a, i257 %b) nounwind {
+; CHECK-LABEL: sdiv257:
+; CHECK:       // %bb.0: // %_udiv-special-cases
+; CHECK-NEXT:    sub sp, sp, #144
+; CHECK-NEXT:    stp x20, x19, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT:    sbfx x20, x4, #0, #1
+; CHECK-NEXT:    eor x9, x20, x0
+; CHECK-NEXT:    eor x10, x20, x1
+; CHECK-NEXT:    subs x14, x9, x20
+; CHECK-NEXT:    ldr x13, [sp, #160]
+; CHECK-NEXT:    eor x9, x20, x2
+; CHECK-NEXT:    sbcs x15, x10, x20
+; CHECK-NEXT:    eor x10, x20, x3
+; CHECK-NEXT:    sbcs x17, x9, x20
+; CHECK-NEXT:    sbcs x16, x10, x20
+; CHECK-NEXT:    sbfx x12, x13, #0, #1
+; CHECK-NEXT:    ldp x9, x10, [sp, #144]
+; CHECK-NEXT:    eor x8, x20, x4
+; CHECK-NEXT:    eor x11, x12, x7
+; CHECK-NEXT:    sbc x0, x8, x20
+; CHECK-NEXT:    eor x8, x12, x6
+; CHECK-NEXT:    subs x8, x8, x12
+; CHECK-NEXT:    eor x18, x12, x13
+; CHECK-NEXT:    eor x1, x12, x9
+; CHECK-NEXT:    sbcs x9, x11, x12
+; CHECK-NEXT:    eor x11, x12, x10
+; CHECK-NEXT:    sbcs x10, x1, x12
+; CHECK-NEXT:    sbcs x11, x11, x12
+; CHECK-NEXT:    clz x1, x10
+; CHECK-NEXT:    sbc x18, x18, x12
+; CHECK-NEXT:    add x1, x1, #64
+; CHECK-NEXT:    clz x2, x11
+; CHECK-NEXT:    cmp x11, #0
+; CHECK-NEXT:    clz x3, x8
+; CHECK-NEXT:    csel x1, x2, x1, ne
+; CHECK-NEXT:    clz x2, x9
+; CHECK-NEXT:    add x3, x3, #64
+; CHECK-NEXT:    cmp x9, #0
+; CHECK-NEXT:    clz x5, x14
+; CHECK-NEXT:    csel x2, x2, x3, ne
+; CHECK-NEXT:    orr x3, x10, x11
+; CHECK-NEXT:    add x2, x2, #128
+; CHECK-NEXT:    cmp x3, #0
+; CHECK-NEXT:    clz x3, x17
+; CHECK-NEXT:    csel x1, x1, x2, ne
+; CHECK-NEXT:    add x2, x3, #64
+; CHECK-NEXT:    clz x3, x16
+; CHECK-NEXT:    cmp x16, #0
+; CHECK-NEXT:    add x5, x5, #64
+; CHECK-NEXT:    csel x2, x3, x2, ne
+; CHECK-NEXT:    clz x3, x15
+; CHECK-NEXT:    cmp x15, #0
+; CHECK-NEXT:    add x1, x1, #256
+; CHECK-NEXT:    csel x3, x3, x5, ne
+; CHECK-NEXT:    orr x5, x17, x16
+; CHECK-NEXT:    add x3, x3, #128
+; CHECK-NEXT:    cmp x5, #0
+; CHECK-NEXT:    csel x2, x2, x3, ne
+; CHECK-NEXT:    ands x6, x0, #0x1
+; CHECK-NEXT:    clz x3, x6
+; CHECK-NEXT:    add x2, x2, #256
+; CHECK-NEXT:    add x3, x3, #192
+; CHECK-NEXT:    orr x5, x9, x11
+; CHECK-NEXT:    csel x2, x3, x2, ne
+; CHECK-NEXT:    ands x18, x18, #0x1
+; CHECK-NEXT:    orr x3, x8, x18
+; CHECK-NEXT:    stp x22, x21, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    orr x3, x3, x10
+; CHECK-NEXT:    stp x24, x23, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    orr x3, x3, x5
+; CHECK-NEXT:    clz x5, x18
+; CHECK-NEXT:    add x5, x5, #192
+; CHECK-NEXT:    stp x18, x6, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    csel x1, x5, x1, ne
+; CHECK-NEXT:    orr x5, x14, x6
+; CHECK-NEXT:    orr x6, x15, x16
+; CHECK-NEXT:    orr x5, x5, x17
+; CHECK-NEXT:    cmp x3, #0
+; CHECK-NEXT:    orr x3, x5, x6
+; CHECK-NEXT:    ccmp x3, #0, #4, ne
+; CHECK-NEXT:    stp x26, x25, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x28, x27, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    eor w13, w13, w4
+; CHECK-NEXT:    stp x29, x30, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    cset w3, eq
+; CHECK-NEXT:    subs x1, x1, #255
+; CHECK-NEXT:    ngcs x6, xzr
+; CHECK-NEXT:    ngcs x7, xzr
+; CHECK-NEXT:    ngcs x19, xzr
+; CHECK-NEXT:    ngcs x22, xzr
+; CHECK-NEXT:    ngcs x23, xzr
+; CHECK-NEXT:    ngcs x24, xzr
+; CHECK-NEXT:    ngc x25, xzr
+; CHECK-NEXT:    subs x2, x2, #255
+; CHECK-NEXT:    ngcs x26, xzr
+; CHECK-NEXT:    ngcs x27, xzr
+; CHECK-NEXT:    ngcs x28, xzr
+; CHECK-NEXT:    ngcs x29, xzr
+; CHECK-NEXT:    ngcs x30, xzr
+; CHECK-NEXT:    ngcs x18, xzr
+; CHECK-NEXT:    ngc x21, xzr
+; CHECK-NEXT:    subs x5, x1, x2
+; CHECK-NEXT:    sbcs x6, x6, x26
+; CHECK-NEXT:    sbcs x7, x7, x27
+; CHECK-NEXT:    eor x27, x12, x20
+; CHECK-NEXT:    sbcs x19, x19, x28
+; CHECK-NEXT:    and x28, x13, #0x1
+; CHECK-NEXT:    sbcs x22, x22, x29
+; CHECK-NEXT:    sbcs x1, x23, x30
+; CHECK-NEXT:    sbcs x18, x24, x18
+; CHECK-NEXT:    sbc x2, x25, x21
+; CHECK-NEXT:    cmp x5, #256
+; CHECK-NEXT:    cset w21, hi
+; CHECK-NEXT:    cmp x6, #0
+; CHECK-NEXT:    cset w23, ne
+; CHECK-NEXT:    orr x25, x7, x19
+; CHECK-NEXT:    csel w21, w21, w23, eq
+; CHECK-NEXT:    cmp x7, #0
+; CHECK-NEXT:    cset w23, ne
+; CHECK-NEXT:    cmp x19, #0
+; CHECK-NEXT:    cset w24, ne
+; CHECK-NEXT:    csel w23, w23, w24, eq
+; CHECK-NEXT:    cmp x25, #0
+; CHECK-NEXT:    csel w21, w21, w23, eq
+; CHECK-NEXT:    cmp x22, #0
+; CHECK-NEXT:    cset w23, ne
+; CHECK-NEXT:    cmp x1, #0
+; CHECK-NEXT:    cset w24, ne
+; CHECK-NEXT:    orr x25, x18, x2
+; CHECK-NEXT:    csel w23, w23, w24, eq
+; CHECK-NEXT:    cmp x18, #0
+; CHECK-NEXT:    cset w24, ne
+; CHECK-NEXT:    cmp x2, #0
+; CHECK-NEXT:    cset w26, ne
+; CHECK-NEXT:    orr x18, x22, x18
+; CHECK-NEXT:    orr x1, x1, x2
+; CHECK-NEXT:    csel w24, w24, w26, eq
+; CHECK-NEXT:    cmp x25, #0
+; CHECK-NEXT:    orr x18, x18, x1
+; CHECK-NEXT:    csel w1, w23, w24, eq
+; CHECK-NEXT:    cmp x18, #0
+; CHECK-NEXT:    csel w18, w21, w1, eq
+; CHECK-NEXT:    orr w18, w3, w18
+; CHECK-NEXT:    cmp w18, #0
+; CHECK-NEXT:    csel x4, xzr, x0, ne
+; CHECK-NEXT:    csel x2, xzr, x16, ne
+; CHECK-NEXT:    csel x1, xzr, x17, ne
+; CHECK-NEXT:    csel x3, xzr, x15, ne
+; CHECK-NEXT:    csel x0, xzr, x14, ne
+; CHECK-NEXT:    and x4, x4, #0x1
+; CHECK-NEXT:    tbnz w18, #0, .LBB4_7
+; CHECK-NEXT:  // %bb.1: // %_udiv-special-cases
+; CHECK-NEXT:    and x20, x22, #0x1
+; CHECK-NEXT:    eor x18, x5, #0x100
+; CHECK-NEXT:    orr x18, x18, x20
+; CHECK-NEXT:    orr x21, x6, x19
+; CHECK-NEXT:    orr x18, x18, x7
+; CHECK-NEXT:    orr x18, x18, x21
+; CHECK-NEXT:    cbz x18, .LBB4_7
+; CHECK-NEXT:  // %bb.2: // %udiv-bb1
+; CHECK-NEXT:    stp x9, x8, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    mov w8, #256
+; CHECK-NEXT:    mov w9, #128
+; CHECK-NEXT:    sub x4, x8, x5
+; CHECK-NEXT:    sub x8, x9, x5
+; CHECK-NEXT:    sub x9, x9, x4
+; CHECK-NEXT:    mvn w13, w9
+; CHECK-NEXT:    lsl x23, x16, #1
+; CHECK-NEXT:    mvn w12, w8
+; CHECK-NEXT:    tst x9, #0x40
+; CHECK-NEXT:    lsr x1, x17, x9
+; CHECK-NEXT:    lsr x2, x16, x9
+; CHECK-NEXT:    lsl x3, x23, x13
+; CHECK-NEXT:    lsr x0, x15, x9
+; CHECK-NEXT:    orr x1, x3, x1
+; CHECK-NEXT:    lsl x22, x15, x8
+; CHECK-NEXT:    csel x1, x2, x1, ne
+; CHECK-NEXT:    lsr x2, x14, #1
+; CHECK-NEXT:    lsr x12, x2, x12
+; CHECK-NEXT:    lsl x18, x14, x8
+; CHECK-NEXT:    csel x21, xzr, x0, ne
+; CHECK-NEXT:    tst x8, #0x40
+; CHECK-NEXT:    orr x12, x22, x12
+; CHECK-NEXT:    csel x3, xzr, x18, ne
+; CHECK-NEXT:    csel x12, x18, x12, ne
+; CHECK-NEXT:    mvn w18, w5
+; CHECK-NEXT:    lsl x25, x15, #1
+; CHECK-NEXT:    lsl x8, x17, x8
+; CHECK-NEXT:    lsr x22, x14, x9
+; CHECK-NEXT:    lsl x13, x25, x13
+; CHECK-NEXT:    csel x8, xzr, x8, ne
+; CHECK-NEXT:    tst x9, #0x40
+; CHECK-NEXT:    orr x9, x13, x22
+; CHECK-NEXT:    lsr x13, x14, x5
+; CHECK-NEXT:    lsl x18, x25, x18
+; CHECK-NEXT:    csel x9, x0, x9, ne
+; CHECK-NEXT:    orr x13, x18, x13
+; CHECK-NEXT:    lsr x18, x15, x5
+; CHECK-NEXT:    tst x5, #0x40
+; CHECK-NEXT:    lsr x26, x17, #1
+; CHECK-NEXT:    csel x13, x18, x13, ne
+; CHECK-NEXT:    cmp x5, #128
+; CHECK-NEXT:    orr w8, w13, w8
+; CHECK-NEXT:    mvn w13, w4
+; CHECK-NEXT:    csel w8, w8, w1, lo
+; CHECK-NEXT:    cmp x5, #0
+; CHECK-NEXT:    lsl x18, x16, x4
+; CHECK-NEXT:    csel w8, w14, w8, eq
+; CHECK-NEXT:    lsr x0, x26, x13
+; CHECK-NEXT:    tst x4, #0x40
+; CHECK-NEXT:    orr x18, x18, x0
+; CHECK-NEXT:    lsl x0, x17, x4
+; CHECK-NEXT:    ldr x29, [sp, #40] // 8-byte Folded Reload
+; CHECK-NEXT:    csel x18, x0, x18, ne
+; CHECK-NEXT:    orr x18, x18, x21
+; CHECK-NEXT:    lsl x21, x15, x4
+; CHECK-NEXT:    lsr x13, x2, x13
+; CHECK-NEXT:    csel x0, xzr, x0, ne
+; CHECK-NEXT:    lsl x1, x29, x4
+; CHECK-NEXT:    orr x13, x21, x13
+; CHECK-NEXT:    lsl x2, x14, x4
+; CHECK-NEXT:    orr x9, x0, x9
+; CHECK-NEXT:    neg x0, x5
+; CHECK-NEXT:    csel x1, xzr, x1, ne
+; CHECK-NEXT:    csel x13, x2, x13, ne
+; CHECK-NEXT:    csel x2, xzr, x2, ne
+; CHECK-NEXT:    cmp x4, #128
+; CHECK-NEXT:    stp x28, x27, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    csel x12, x18, x12, lo
+; CHECK-NEXT:    csel x9, x9, x3, lo
+; CHECK-NEXT:    csel x18, x2, xzr, lo
+; CHECK-NEXT:    csel x13, x13, xzr, lo
+; CHECK-NEXT:    csel w2, w1, wzr, lo
+; CHECK-NEXT:    cmp x4, #0
+; CHECK-NEXT:    csel x9, x17, x9, eq
+; CHECK-NEXT:    csel x12, x16, x12, eq
+; CHECK-NEXT:    lsl x1, x14, x0
+; CHECK-NEXT:    tst x0, #0x40
+; CHECK-NEXT:    csel x1, xzr, x1, ne
+; CHECK-NEXT:    cmp x0, #128
+; CHECK-NEXT:    csel x0, x1, xzr, lo
+; CHECK-NEXT:    cmp x4, #256
+; CHECK-NEXT:    orr w8, w2, w8
+; CHECK-NEXT:    csel x1, x12, xzr, lo
+; CHECK-NEXT:    csel x3, x9, xzr, lo
+; CHECK-NEXT:    csel x8, x8, x0, lo
+; CHECK-NEXT:    csel x2, x13, xzr, lo
+; CHECK-NEXT:    csel x0, x18, xzr, lo
+; CHECK-NEXT:    cmp x4, #0
+; CHECK-NEXT:    csel x24, x29, x8, eq
+; CHECK-NEXT:    adds x4, x5, #1
+; CHECK-NEXT:    adcs x5, x6, xzr
+; CHECK-NEXT:    adcs x6, x7, xzr
+; CHECK-NEXT:    adcs x7, x19, xzr
+; CHECK-NEXT:    cinc x8, x20, hs
+; CHECK-NEXT:    orr x9, x5, x7
+; CHECK-NEXT:    and x19, x8, #0x1
+; CHECK-NEXT:    mov x20, xzr
+; CHECK-NEXT:    orr x8, x4, x19
+; CHECK-NEXT:    orr x8, x8, x6
+; CHECK-NEXT:    orr x8, x8, x9
+; CHECK-NEXT:    cbz x8, .LBB4_5
+; CHECK-NEXT:  // %bb.3: // %udiv-preheader
+; CHECK-NEXT:    mov w8, #128
+; CHECK-NEXT:    mvn w13, w4
+; CHECK-NEXT:    sub x9, x8, x4
+; CHECK-NEXT:    and x21, x29, #0x1
+; CHECK-NEXT:    mvn w12, w9
+; CHECK-NEXT:    tst x9, #0x40
+; CHECK-NEXT:    lsl x28, x23, x13
+; CHECK-NEXT:    lsr x29, x14, x4
+; CHECK-NEXT:    lsl x27, x16, x9
+; CHECK-NEXT:    lsl x18, x21, x9
+; CHECK-NEXT:    lsr x12, x26, x12
+; CHECK-NEXT:    lsl x9, x17, x9
+; CHECK-NEXT:    orr x12, x27, x12
+; CHECK-NEXT:    lsr x27, x17, x4
+; CHECK-NEXT:    lsl x13, x25, x13
+; CHECK-NEXT:    csel x26, x18, xzr, ne
+; CHECK-NEXT:    csel x18, xzr, x18, ne
+; CHECK-NEXT:    csel x12, x9, x12, ne
+; CHECK-NEXT:    csel x9, xzr, x9, ne
+; CHECK-NEXT:    orr x27, x28, x27
+; CHECK-NEXT:    lsr x28, x16, x4
+; CHECK-NEXT:    tst x4, #0x40
+; CHECK-NEXT:    orr x13, x13, x29
+; CHECK-NEXT:    lsr x25, x15, x4
+; CHECK-NEXT:    csel x27, x28, x27, ne
+; CHECK-NEXT:    csel x13, x25, x13, ne
+; CHECK-NEXT:    csel x25, xzr, x25, ne
+; CHECK-NEXT:    csel x28, xzr, x28, ne
+; CHECK-NEXT:    subs x29, x4, #128
+; CHECK-NEXT:    orr x9, x13, x9
+; CHECK-NEXT:    mvn w30, w29
+; CHECK-NEXT:    mov w13, #256
+; CHECK-NEXT:    sub x13, x13, x4
+; CHECK-NEXT:    csel x28, x28, xzr, lo
+; CHECK-NEXT:    tst x29, #0x40
+; CHECK-NEXT:    sub x8, x8, x13
+; CHECK-NEXT:    lsl x23, x23, x30
+; CHECK-NEXT:    lsr x30, x17, x29
+; CHECK-NEXT:    orr x23, x23, x30
+; CHECK-NEXT:    lsr x29, x16, x29
+; CHECK-NEXT:    csel x23, x29, x23, ne
+; CHECK-NEXT:    csel x29, xzr, x29, ne
+; CHECK-NEXT:    cmp x4, #128
+; CHECK-NEXT:    orr x12, x25, x12
+; CHECK-NEXT:    csel x9, x9, x23, lo
+; CHECK-NEXT:    csel x23, x27, xzr, lo
+; CHECK-NEXT:    csel x12, x12, x29, lo
+; CHECK-NEXT:    lsl x25, x21, x13
+; CHECK-NEXT:    tst x13, #0x40
+; CHECK-NEXT:    lsr x29, x21, x8
+; CHECK-NEXT:    csel x27, x25, xzr, ne
+; CHECK-NEXT:    csel x25, xzr, x25, ne
+; CHECK-NEXT:    tst x8, #0x40
+; CHECK-NEXT:    mov x22, xzr
+; CHECK-NEXT:    csel x8, xzr, x29, ne
+; CHECK-NEXT:    cmp x4, #0
+; CHECK-NEXT:    csel x9, x14, x9, eq
+; CHECK-NEXT:    csel x12, x15, x12, eq
+; CHECK-NEXT:    cmp x13, #128
+; CHECK-NEXT:    and x24, x24, #0x1
+; CHECK-NEXT:    csel x8, x8, x18, lo
+; CHECK-NEXT:    csel x18, x25, xzr, lo
+; CHECK-NEXT:    csel x25, x27, xzr, lo
+; CHECK-NEXT:    csel x26, xzr, x26, lo
+; CHECK-NEXT:    cmp x13, #0
+; CHECK-NEXT:    orr x12, x12, x25
+; CHECK-NEXT:    csel x8, xzr, x8, eq
+; CHECK-NEXT:    csel x13, xzr, x26, eq
+; CHECK-NEXT:    subs x26, x4, #256
+; CHECK-NEXT:    orr x8, x23, x8
+; CHECK-NEXT:    orr x13, x28, x13
+; CHECK-NEXT:    csel x8, x8, xzr, lo
+; CHECK-NEXT:    csel x13, x13, xzr, lo
+; CHECK-NEXT:    tst x26, #0x40
+; CHECK-NEXT:    lsr x23, x21, x26
+; CHECK-NEXT:    mov x25, x10
+; CHECK-NEXT:    csel x23, xzr, x23, ne
+; CHECK-NEXT:    cmp x26, #128
+; CHECK-NEXT:    csel x23, x23, xzr, lo
+; CHECK-NEXT:    cmp x26, #0
+; CHECK-NEXT:    ldr x10, [sp, #8] // 8-byte Folded Reload
+; CHECK-NEXT:    csel x21, x21, x23, eq
+; CHECK-NEXT:    cmp x4, #256
+; CHECK-NEXT:    orr x9, x9, x18
+; CHECK-NEXT:    csel x12, x12, xzr, lo
+; CHECK-NEXT:    csel x9, x9, x21, lo
+; CHECK-NEXT:    cmp x4, #0
+; CHECK-NEXT:    csel x28, x17, x8, eq
+; CHECK-NEXT:    csel x27, x14, x9, eq
+; CHECK-NEXT:    csel x29, x15, x12, eq
+; CHECK-NEXT:    csel x26, x16, x13, eq
+; CHECK-NEXT:    subs x8, x10, #1
+; CHECK-NEXT:    mov x14, x11
+; CHECK-NEXT:    ldr x11, [sp] // 8-byte Folded Reload
+; CHECK-NEXT:    mov x16, #-1
+; CHECK-NEXT:    mov w9, #1
+; CHECK-NEXT:    str x8, [sp, #40] // 8-byte Folded Spill
+; CHECK-NEXT:    ldr x8, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    adcs x17, x11, x16
+; CHECK-NEXT:    adcs x21, x25, x16
+; CHECK-NEXT:    adcs x23, x14, x16
+; CHECK-NEXT:    adc x8, x8, x9
+; CHECK-NEXT:    and x18, x8, #0x1
+; CHECK-NEXT:  .LBB4_4: // %udiv-do-while
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldr x8, [sp, #40] // 8-byte Folded Reload
+; CHECK-NEXT:    extr x30, x29, x27, #63
+; CHECK-NEXT:    extr x13, x28, x29, #63
+; CHECK-NEXT:    bfi x24, x27, #1, #63
+; CHECK-NEXT:    extr x12, x26, x28, #63
+; CHECK-NEXT:    lsr x26, x26, #63
+; CHECK-NEXT:    cmp x8, x24
+; CHECK-NEXT:    extr x8, x1, x3, #63
+; CHECK-NEXT:    sbcs xzr, x17, x30
+; CHECK-NEXT:    extr x3, x3, x2, #63
+; CHECK-NEXT:    sbcs xzr, x21, x13
+; CHECK-NEXT:    extr x2, x2, x0, #63
+; CHECK-NEXT:    sbcs xzr, x23, x12
+; CHECK-NEXT:    mov x15, xzr
+; CHECK-NEXT:    sbc x26, x18, x26
+; CHECK-NEXT:    orr x0, x20, x0, lsl #1
+; CHECK-NEXT:    sbfx x9, x26, #0, #1
+; CHECK-NEXT:    orr x2, x22, x2
+; CHECK-NEXT:    and x26, x9, x10
+; CHECK-NEXT:    orr x3, x22, x3
+; CHECK-NEXT:    subs x27, x24, x26
+; CHECK-NEXT:    and x24, x9, x11
+; CHECK-NEXT:    sbcs x29, x30, x24
+; CHECK-NEXT:    and x24, x9, x25
+; CHECK-NEXT:    sbcs x28, x13, x24
+; CHECK-NEXT:    and x13, x9, x14
+; CHECK-NEXT:    sbcs x26, x12, x13
+; CHECK-NEXT:    lsr x12, x1, #63
+; CHECK-NEXT:    subs x4, x4, #1
+; CHECK-NEXT:    orr x1, x22, x8
+; CHECK-NEXT:    adcs x5, x5, x16
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    adcs x6, x6, x16
+; CHECK-NEXT:    orr w12, w22, w12
+; CHECK-NEXT:    adcs x7, x7, x16
+; CHECK-NEXT:    and x24, x12, #0x1
+; CHECK-NEXT:    adc x8, x19, x8
+; CHECK-NEXT:    orr x12, x5, x7
+; CHECK-NEXT:    and x19, x8, #0x1
+; CHECK-NEXT:    and x20, x9, #0x1
+; CHECK-NEXT:    orr x8, x4, x19
+; CHECK-NEXT:    mov x22, xzr
+; CHECK-NEXT:    orr x8, x8, x6
+; CHECK-NEXT:    orr x8, x8, x12
+; CHECK-NEXT:    cbnz x8, .LBB4_4
+; CHECK-NEXT:    b .LBB4_6
+; CHECK-NEXT:  .LBB4_5:
+; CHECK-NEXT:    mov x15, xzr
+; CHECK-NEXT:  .LBB4_6: // %udiv-loop-exit
+; CHECK-NEXT:    extr x8, x3, x2, #63
+; CHECK-NEXT:    extr x9, x2, x0, #63
+; CHECK-NEXT:    extr x10, x1, x3, #63
+; CHECK-NEXT:    lsr x11, x1, #63
+; CHECK-NEXT:    ldp x28, x27, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    orr w11, w15, w11
+; CHECK-NEXT:    orr x0, x20, x0, lsl #1
+; CHECK-NEXT:    orr x3, x15, x9
+; CHECK-NEXT:    orr x1, x15, x8
+; CHECK-NEXT:    orr x2, x15, x10
+; CHECK-NEXT:    and x4, x11, #0x1
+; CHECK-NEXT:  .LBB4_7: // %udiv-end
+; CHECK-NEXT:    eor x8, x0, x27
+; CHECK-NEXT:    eor x9, x3, x27
+; CHECK-NEXT:    subs x0, x8, x27
+; CHECK-NEXT:    eor x8, x1, x27
+; CHECK-NEXT:    sbcs x1, x9, x27
+; CHECK-NEXT:    eor x9, x2, x27
+; CHECK-NEXT:    sbcs x2, x8, x27
+; CHECK-NEXT:    eor x8, x4, x28
+; CHECK-NEXT:    sbcs x3, x9, x27
+; CHECK-NEXT:    sbc x8, x8, x28
+; CHECK-NEXT:    ldp x20, x19, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT:    and x4, x8, #0x1
+; CHECK-NEXT:    ldp x22, x21, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x24, x23, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x28, x27, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x29, x30, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #144
+; CHECK-NEXT:    ret
+;
+; CHECK-BE-LABEL: sdiv257:
+; CHECK-BE:       // %bb.0: // %_udiv-special-cases
+; CHECK-BE-NEXT:    sub sp, sp, #192
+; CHECK-BE-NEXT:    sbfx x14, x0, #0, #1
+; CHECK-BE-NEXT:    sbfx x13, x6, #0, #1
+; CHECK-BE-NEXT:    eor x10, x14, x4
+; CHECK-BE-NEXT:    eor x11, x14, x3
+; CHECK-BE-NEXT:    subs x15, x10, x14
+; CHECK-BE-NEXT:    eor x10, x14, x2
+; CHECK-BE-NEXT:    sbcs x16, x11, x14
+; CHECK-BE-NEXT:    eor x11, x14, x1
+; CHECK-BE-NEXT:    sbcs x18, x10, x14
+; CHECK-BE-NEXT:    eor x9, x14, x0
+; CHECK-BE-NEXT:    ldp x10, x8, [sp, #200]
+; CHECK-BE-NEXT:    sbcs x17, x11, x14
+; CHECK-BE-NEXT:    eor x12, x13, x6
+; CHECK-BE-NEXT:    ldr x11, [sp, #192]
+; CHECK-BE-NEXT:    sbc x1, x9, x14
+; CHECK-BE-NEXT:    clz x5, x15
+; CHECK-BE-NEXT:    stp x20, x19, [sp, #176] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    eor x9, x13, x10
+; CHECK-BE-NEXT:    add x5, x5, #64
+; CHECK-BE-NEXT:    eor x8, x13, x8
+; CHECK-BE-NEXT:    eor x10, x13, x11
+; CHECK-BE-NEXT:    subs x8, x8, x13
+; CHECK-BE-NEXT:    eor x11, x13, x7
+; CHECK-BE-NEXT:    sbcs x9, x9, x13
+; CHECK-BE-NEXT:    clz x4, x8
+; CHECK-BE-NEXT:    sbcs x10, x10, x13
+; CHECK-BE-NEXT:    add x4, x4, #64
+; CHECK-BE-NEXT:    sbcs x11, x11, x13
+; CHECK-BE-NEXT:    clz x2, x10
+; CHECK-BE-NEXT:    sbc x12, x12, x13
+; CHECK-BE-NEXT:    add x2, x2, #64
+; CHECK-BE-NEXT:    clz x3, x11
+; CHECK-BE-NEXT:    cmp x11, #0
+; CHECK-BE-NEXT:    csel x2, x3, x2, ne
+; CHECK-BE-NEXT:    clz x3, x9
+; CHECK-BE-NEXT:    cmp x9, #0
+; CHECK-BE-NEXT:    stp x22, x21, [sp, #160] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    csel x3, x3, x4, ne
+; CHECK-BE-NEXT:    orr x4, x10, x11
+; CHECK-BE-NEXT:    add x3, x3, #128
+; CHECK-BE-NEXT:    cmp x4, #0
+; CHECK-BE-NEXT:    clz x4, x18
+; CHECK-BE-NEXT:    csel x2, x2, x3, ne
+; CHECK-BE-NEXT:    add x3, x4, #64
+; CHECK-BE-NEXT:    clz x4, x17
+; CHECK-BE-NEXT:    cmp x17, #0
+; CHECK-BE-NEXT:    add x2, x2, #256
+; CHECK-BE-NEXT:    csel x3, x4, x3, ne
+; CHECK-BE-NEXT:    clz x4, x16
+; CHECK-BE-NEXT:    cmp x16, #0
+; CHECK-BE-NEXT:    stp x24, x23, [sp, #144] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    csel x4, x4, x5, ne
+; CHECK-BE-NEXT:    orr x5, x18, x17
+; CHECK-BE-NEXT:    add x4, x4, #128
+; CHECK-BE-NEXT:    cmp x5, #0
+; CHECK-BE-NEXT:    csel x3, x3, x4, ne
+; CHECK-BE-NEXT:    ands x7, x1, #0x1
+; CHECK-BE-NEXT:    clz x4, x7
+; CHECK-BE-NEXT:    add x3, x3, #256
+; CHECK-BE-NEXT:    add x4, x4, #192
+; CHECK-BE-NEXT:    orr x5, x9, x11
+; CHECK-BE-NEXT:    csel x3, x4, x3, ne
+; CHECK-BE-NEXT:    ands x12, x12, #0x1
+; CHECK-BE-NEXT:    orr x4, x8, x12
+; CHECK-BE-NEXT:    str x7, [sp, #88] // 8-byte Folded Spill
+; CHECK-BE-NEXT:    orr x4, x4, x10
+; CHECK-BE-NEXT:    stp x26, x25, [sp, #128] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    orr x4, x4, x5
+; CHECK-BE-NEXT:    clz x5, x12
+; CHECK-BE-NEXT:    add x5, x5, #192
+; CHECK-BE-NEXT:    stp x28, x27, [sp, #112] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    csel x2, x5, x2, ne
+; CHECK-BE-NEXT:    orr x5, x15, x7
+; CHECK-BE-NEXT:    orr x7, x16, x17
+; CHECK-BE-NEXT:    orr x5, x5, x18
+; CHECK-BE-NEXT:    cmp x4, #0
+; CHECK-BE-NEXT:    orr x4, x5, x7
+; CHECK-BE-NEXT:    ccmp x4, #0, #4, ne
+; CHECK-BE-NEXT:    stp x29, x30, [sp, #96] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    eor w0, w6, w0
+; CHECK-BE-NEXT:    cset w20, eq
+; CHECK-BE-NEXT:    subs x2, x2, #255
+; CHECK-BE-NEXT:    ngcs x4, xzr
+; CHECK-BE-NEXT:    ngcs x7, xzr
+; CHECK-BE-NEXT:    ngcs x19, xzr
+; CHECK-BE-NEXT:    ngcs x22, xzr
+; CHECK-BE-NEXT:    ngcs x23, xzr
+; CHECK-BE-NEXT:    ngcs x24, xzr
+; CHECK-BE-NEXT:    ngc x25, xzr
+; CHECK-BE-NEXT:    subs x3, x3, #255
+; CHECK-BE-NEXT:    ngcs x5, xzr
+; CHECK-BE-NEXT:    ngcs x26, xzr
+; CHECK-BE-NEXT:    ngcs x27, xzr
+; CHECK-BE-NEXT:    ngcs x28, xzr
+; CHECK-BE-NEXT:    ngcs x29, xzr
+; CHECK-BE-NEXT:    ngcs x30, xzr
+; CHECK-BE-NEXT:    ngc x21, xzr
+; CHECK-BE-NEXT:    subs x3, x2, x3
+; CHECK-BE-NEXT:    sbcs x5, x4, x5
+; CHECK-BE-NEXT:    sbcs x7, x7, x26
+; CHECK-BE-NEXT:    sbcs x19, x19, x27
+; CHECK-BE-NEXT:    sbcs x4, x22, x28
+; CHECK-BE-NEXT:    orr x26, x7, x19
+; CHECK-BE-NEXT:    sbcs x2, x23, x29
+; CHECK-BE-NEXT:    sbcs x22, x24, x30
+; CHECK-BE-NEXT:    sbc x21, x25, x21
+; CHECK-BE-NEXT:    cmp x3, #256
+; CHECK-BE-NEXT:    cset w23, hi
+; CHECK-BE-NEXT:    cmp x5, #0
+; CHECK-BE-NEXT:    cset w24, ne
+; CHECK-BE-NEXT:    csel w23, w23, w24, eq
+; CHECK-BE-NEXT:    cmp x7, #0
+; CHECK-BE-NEXT:    cset w24, ne
+; CHECK-BE-NEXT:    cmp x19, #0
+; CHECK-BE-NEXT:    cset w25, ne
+; CHECK-BE-NEXT:    csel w24, w24, w25, eq
+; CHECK-BE-NEXT:    cmp x26, #0
+; CHECK-BE-NEXT:    csel w23, w23, w24, eq
+; CHECK-BE-NEXT:    cmp x4, #0
+; CHECK-BE-NEXT:    cset w24, ne
+; CHECK-BE-NEXT:    cmp x2, #0
+; CHECK-BE-NEXT:    cset w25, ne
+; CHECK-BE-NEXT:    orr x26, x22, x21
+; CHECK-BE-NEXT:    csel w24, w24, w25, eq
+; CHECK-BE-NEXT:    cmp x22, #0
+; CHECK-BE-NEXT:    cset w25, ne
+; CHECK-BE-NEXT:    cmp x21, #0
+; CHECK-BE-NEXT:    cset w27, ne
+; CHECK-BE-NEXT:    orr x22, x4, x22
+; CHECK-BE-NEXT:    orr x2, x2, x21
+; CHECK-BE-NEXT:    csel w25, w25, w27, eq
+; CHECK-BE-NEXT:    cmp x26, #0
+; CHECK-BE-NEXT:    orr x2, x22, x2
+; CHECK-BE-NEXT:    csel w21, w24, w25, eq
+; CHECK-BE-NEXT:    cmp x2, #0
+; CHECK-BE-NEXT:    eor x22, x13, x14
+; CHECK-BE-NEXT:    csel w14, w23, w21, eq
+; CHECK-BE-NEXT:    orr w20, w20, w14
+; CHECK-BE-NEXT:    and x27, x0, #0x1
+; CHECK-BE-NEXT:    cmp w20, #0
+; CHECK-BE-NEXT:    csel x6, xzr, x1, ne
+; CHECK-BE-NEXT:    csel x14, xzr, x15, ne
+; CHECK-BE-NEXT:    csel x2, xzr, x16, ne
+; CHECK-BE-NEXT:    csel x0, xzr, x18, ne
+; CHECK-BE-NEXT:    csel x1, xzr, x17, ne
+; CHECK-BE-NEXT:    and x6, x6, #0x1
+; CHECK-BE-NEXT:    tbnz w20, #0, .LBB4_7
+; CHECK-BE-NEXT:  // %bb.1: // %_udiv-special-cases
+; CHECK-BE-NEXT:    and x20, x4, #0x1
+; CHECK-BE-NEXT:    eor x4, x3, #0x100
+; CHECK-BE-NEXT:    orr x4, x4, x20
+; CHECK-BE-NEXT:    orr x21, x5, x19
+; CHECK-BE-NEXT:    orr x4, x4, x7
+; CHECK-BE-NEXT:    orr x4, x4, x21
+; CHECK-BE-NEXT:    cbz x4, .LBB4_7
+; CHECK-BE-NEXT:  // %bb.2: // %udiv-bb1
+; CHECK-BE-NEXT:    str x8, [sp, #40] // 8-byte Folded Spill
+; CHECK-BE-NEXT:    mov w8, #256
+; CHECK-BE-NEXT:    stp x10, x9, [sp, #24] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    mov w9, #128
+; CHECK-BE-NEXT:    sub x6, x8, x3
+; CHECK-BE-NEXT:    sub x8, x9, x3
+; CHECK-BE-NEXT:    sub x9, x9, x6
+; CHECK-BE-NEXT:    stp x12, x11, [sp, #8] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    mvn w11, w9
+; CHECK-BE-NEXT:    lsl x23, x17, #1
+; CHECK-BE-NEXT:    mvn w10, w8
+; CHECK-BE-NEXT:    tst x9, #0x40
+; CHECK-BE-NEXT:    lsr x14, x18, x9
+; CHECK-BE-NEXT:    lsr x0, x17, x9
+; CHECK-BE-NEXT:    lsl x1, x23, x11
+; CHECK-BE-NEXT:    lsr x13, x16, x9
+; CHECK-BE-NEXT:    orr x14, x1, x14
+; CHECK-BE-NEXT:    lsl x4, x16, x8
+; CHECK-BE-NEXT:    csel x14, x0, x14, ne
+; CHECK-BE-NEXT:    lsr x0, x15, #1
+; CHECK-BE-NEXT:    lsr x10, x0, x10
+; CHECK-BE-NEXT:    lsl x12, x15, x8
+; CHECK-BE-NEXT:    csel x2, xzr, x13, ne
+; CHECK-BE-NEXT:    tst x8, #0x40
+; CHECK-BE-NEXT:    orr x10, x4, x10
+; CHECK-BE-NEXT:    csel x1, xzr, x12, ne
+; CHECK-BE-NEXT:    csel x10, x12, x10, ne
+; CHECK-BE-NEXT:    mvn w12, w3
+; CHECK-BE-NEXT:    lsl x25, x16, #1
+; CHECK-BE-NEXT:    lsl x8, x18, x8
+; CHECK-BE-NEXT:    lsr x4, x15, x9
+; CHECK-BE-NEXT:    lsl x11, x25, x11
+; CHECK-BE-NEXT:    csel x8, xzr, x8, ne
+; CHECK-BE-NEXT:    tst x9, #0x40
+; CHECK-BE-NEXT:    orr x9, x11, x4
+; CHECK-BE-NEXT:    lsr x11, x15, x3
+; CHECK-BE-NEXT:    lsl x12, x25, x12
+; CHECK-BE-NEXT:    csel x9, x13, x9, ne
+; CHECK-BE-NEXT:    orr x11, x12, x11
+; CHECK-BE-NEXT:    lsr x12, x16, x3
+; CHECK-BE-NEXT:    tst x3, #0x40
+; CHECK-BE-NEXT:    lsr x26, x18, #1
+; CHECK-BE-NEXT:    csel x11, x12, x11, ne
+; CHECK-BE-NEXT:    cmp x3, #128
+; CHECK-BE-NEXT:    orr w8, w11, w8
+; CHECK-BE-NEXT:    mvn w11, w6
+; CHECK-BE-NEXT:    csel w8, w8, w14, lo
+; CHECK-BE-NEXT:    cmp x3, #0
+; CHECK-BE-NEXT:    lsl x12, x17, x6
+; CHECK-BE-NEXT:    csel w8, w15, w8, eq
+; CHECK-BE-NEXT:    lsr x13, x26, x11
+; CHECK-BE-NEXT:    tst x6, #0x40
+; CHECK-BE-NEXT:    orr x12, x12, x13
+; CHECK-BE-NEXT:    lsl x13, x18, x6
+; CHECK-BE-NEXT:    ldr x21, [sp, #88] // 8-byte Folded Reload
+; CHECK-BE-NEXT:    csel x12, x13, x12, ne
+; CHECK-BE-NEXT:    orr x12, x12, x2
+; CHECK-BE-NEXT:    lsl x2, x16, x6
+; CHECK-BE-NEXT:    lsr x11, x0, x11
+; CHECK-BE-NEXT:    csel x13, xzr, x13, ne
+; CHECK-BE-NEXT:    lsl x14, x21, x6
+; CHECK-BE-NEXT:    lsl x0, x15, x6
+; CHECK-BE-NEXT:    orr x11, x2, x11
+; CHECK-BE-NEXT:    orr x9, x13, x9
+; CHECK-BE-NEXT:    neg x13, x3
+; CHECK-BE-NEXT:    csel x14, xzr, x14, ne
+; CHECK-BE-NEXT:    csel x2, xzr, x0, ne
+; CHECK-BE-NEXT:    csel x11, x0, x11, ne
+; CHECK-BE-NEXT:    cmp x6, #128
+; CHECK-BE-NEXT:    stp x27, x22, [sp, #48] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    csel x10, x12, x10, lo
+; CHECK-BE-NEXT:    csel x9, x9, x1, lo
+; CHECK-BE-NEXT:    csel x11, x11, xzr, lo
+; CHECK-BE-NEXT:    csel x12, x2, xzr, lo
+; CHECK-BE-NEXT:    csel w14, w14, wzr, lo
+; CHECK-BE-NEXT:    cmp x6, #0
+; CHECK-BE-NEXT:    csel x9, x18, x9, eq
+; CHECK-BE-NEXT:    csel x10, x17, x10, eq
+; CHECK-BE-NEXT:    lsl x0, x15, x13
+; CHECK-BE-NEXT:    tst x13, #0x40
+; CHECK-BE-NEXT:    csel x0, xzr, x0, ne
+; CHECK-BE-NEXT:    cmp x13, #128
+; CHECK-BE-NEXT:    csel x13, x0, xzr, lo
+; CHECK-BE-NEXT:    cmp x6, #256
+; CHECK-BE-NEXT:    orr w8, w14, w8
+; CHECK-BE-NEXT:    csel x0, x10, xzr, lo
+; CHECK-BE-NEXT:    csel x2, x9, xzr, lo
+; CHECK-BE-NEXT:    csel x8, x8, x13, lo
+; CHECK-BE-NEXT:    csel x1, x12, xzr, lo
+; CHECK-BE-NEXT:    csel x4, x11, xzr, lo
+; CHECK-BE-NEXT:    cmp x6, #0
+; CHECK-BE-NEXT:    csel x24, x21, x8, eq
+; CHECK-BE-NEXT:    adds x3, x3, #1
+; CHECK-BE-NEXT:    adcs x6, x5, xzr
+; CHECK-BE-NEXT:    adcs x7, x7, xzr
+; CHECK-BE-NEXT:    adcs x19, x19, xzr
+; CHECK-BE-NEXT:    cinc x8, x20, hs
+; CHECK-BE-NEXT:    orr x9, x6, x19
+; CHECK-BE-NEXT:    and x20, x8, #0x1
+; CHECK-BE-NEXT:    orr x8, x3, x20
+; CHECK-BE-NEXT:    orr x8, x8, x7
+; CHECK-BE-NEXT:    orr x8, x8, x9
+; CHECK-BE-NEXT:    cbz x8, .LBB4_5
+; CHECK-BE-NEXT:  // %bb.3: // %udiv-preheader
+; CHECK-BE-NEXT:    mov w8, #128
+; CHECK-BE-NEXT:    mvn w11, w3
+; CHECK-BE-NEXT:    sub x9, x8, x3
+; CHECK-BE-NEXT:    and x14, x21, #0x1
+; CHECK-BE-NEXT:    mvn w10, w9
+; CHECK-BE-NEXT:    tst x9, #0x40
+; CHECK-BE-NEXT:    lsr x27, x15, x3
+; CHECK-BE-NEXT:    mov x22, xzr
+; CHECK-BE-NEXT:    lsl x13, x17, x9
+; CHECK-BE-NEXT:    lsl x12, x14, x9
+; CHECK-BE-NEXT:    lsr x10, x26, x10
+; CHECK-BE-NEXT:    lsl x9, x18, x9
+; CHECK-BE-NEXT:    orr x10, x13, x10
+; CHECK-BE-NEXT:    lsr x13, x18, x3
+; CHECK-BE-NEXT:    lsl x26, x23, x11
+; CHECK-BE-NEXT:    lsl x11, x25, x11
+; CHECK-BE-NEXT:    csel x21, x12, xzr, ne
+; CHECK-BE-NEXT:    csel x12, xzr, x12, ne
+; CHECK-BE-NEXT:    csel x10, x9, x10, ne
+; CHECK-BE-NEXT:    csel x9, xzr, x9, ne
+; CHECK-BE-NEXT:    orr x13, x26, x13
+; CHECK-BE-NEXT:    lsr x26, x17, x3
+; CHECK-BE-NEXT:    tst x3, #0x40
+; CHECK-BE-NEXT:    orr x11, x11, x27
+; CHECK-BE-NEXT:    lsr x25, x16, x3
+; CHECK-BE-NEXT:    lsr x27, x14, x3
+; CHECK-BE-NEXT:    csel x13, x26, x13, ne
+; CHECK-BE-NEXT:    csel x11, x25, x11, ne
+; CHECK-BE-NEXT:    csel x25, xzr, x25, ne
+; CHECK-BE-NEXT:    csel x27, xzr, x27, ne
+; CHECK-BE-NEXT:    csel x26, xzr, x26, ne
+; CHECK-BE-NEXT:    subs x28, x3, #128
+; CHECK-BE-NEXT:    mvn w29, w28
+; CHECK-BE-NEXT:    orr x9, x11, x9
+; CHECK-BE-NEXT:    mov w11, #256
+; CHECK-BE-NEXT:    csel x26, x26, xzr, lo
+; CHECK-BE-NEXT:    sub x11, x11, x3
+; CHECK-BE-NEXT:    csel x27, x27, xzr, lo
+; CHECK-BE-NEXT:    lsl x23, x23, x29
+; CHECK-BE-NEXT:    lsr x29, x18, x28
+; CHECK-BE-NEXT:    orr x23, x23, x29
+; CHECK-BE-NEXT:    tst x28, #0x40
+; CHECK-BE-NEXT:    lsr x28, x17, x28
+; CHECK-BE-NEXT:    sub x8, x8, x11
+; CHECK-BE-NEXT:    csel x23, x28, x23, ne
+; CHECK-BE-NEXT:    csel x28, xzr, x28, ne
+; CHECK-BE-NEXT:    cmp x3, #128
+; CHECK-BE-NEXT:    orr x10, x25, x10
+; CHECK-BE-NEXT:    csel x9, x9, x23, lo
+; CHECK-BE-NEXT:    csel x13, x13, xzr, lo
+; CHECK-BE-NEXT:    csel x10, x10, x28, lo
+; CHECK-BE-NEXT:    lsl x23, x14, x11
+; CHECK-BE-NEXT:    tst x11, #0x40
+; CHECK-BE-NEXT:    lsr x28, x14, x8
+; CHECK-BE-NEXT:    csel x25, x23, xzr, ne
+; CHECK-BE-NEXT:    csel x23, xzr, x23, ne
+; CHECK-BE-NEXT:    tst x8, #0x40
+; CHECK-BE-NEXT:    mov x5, xzr
+; CHECK-BE-NEXT:    csel x8, xzr, x28, ne
+; CHECK-BE-NEXT:    cmp x3, #0
+; CHECK-BE-NEXT:    csel x9, x15, x9, eq
+; CHECK-BE-NEXT:    csel x10, x16, x10, eq
+; CHECK-BE-NEXT:    csel x30, x14, x27, eq
+; CHECK-BE-NEXT:    cmp x11, #128
+; CHECK-BE-NEXT:    csel x8, x8, x12, lo
+; CHECK-BE-NEXT:    csel x12, x23, xzr, lo
+; CHECK-BE-NEXT:    csel x23, x25, xzr, lo
+; CHECK-BE-NEXT:    csel x21, xzr, x21, lo
+; CHECK-BE-NEXT:    cmp x11, #0
+; CHECK-BE-NEXT:    orr x10, x10, x23
+; CHECK-BE-NEXT:    csel x8, xzr, x8, eq
+; CHECK-BE-NEXT:    csel x11, xzr, x21, eq
+; CHECK-BE-NEXT:    subs x21, x3, #256
+; CHECK-BE-NEXT:    orr x8, x13, x8
+; CHECK-BE-NEXT:    orr x11, x26, x11
+; CHECK-BE-NEXT:    csel x8, x8, xzr, lo
+; CHECK-BE-NEXT:    csel x11, x11, xzr, lo
+; CHECK-BE-NEXT:    tst x21, #0x40
+; CHECK-BE-NEXT:    lsr x13, x14, x21
+; CHECK-BE-NEXT:    orr x9, x9, x12
+; CHECK-BE-NEXT:    csel x13, xzr, x13, ne
+; CHECK-BE-NEXT:    cmp x21, #128
+; CHECK-BE-NEXT:    csel x13, x13, xzr, lo
+; CHECK-BE-NEXT:    cmp x21, #0
+; CHECK-BE-NEXT:    csel x13, x14, x13, eq
+; CHECK-BE-NEXT:    cmp x3, #256
+; CHECK-BE-NEXT:    csel x10, x10, xzr, lo
+; CHECK-BE-NEXT:    csel x9, x9, x13, lo
+; CHECK-BE-NEXT:    cmp x3, #0
+; CHECK-BE-NEXT:    ldr x23, [sp, #8] // 8-byte Folded Reload
+; CHECK-BE-NEXT:    csel x27, x15, x9, eq
+; CHECK-BE-NEXT:    csel x28, x18, x8, eq
+; CHECK-BE-NEXT:    ldp x15, x25, [sp, #32] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    csel x29, x16, x10, eq
+; CHECK-BE-NEXT:    csel x26, x17, x11, eq
+; CHECK-BE-NEXT:    cmp x3, #256
+; CHECK-BE-NEXT:    mov x17, #-1
+; CHECK-BE-NEXT:    ldp x21, x18, [sp, #16] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    csel x30, x30, xzr, lo
+; CHECK-BE-NEXT:    and x14, x24, #0x1
+; CHECK-BE-NEXT:    subs x9, x25, #1
+; CHECK-BE-NEXT:    adcs x8, x15, x17
+; CHECK-BE-NEXT:    stp x8, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    adcs x9, x18, x17
+; CHECK-BE-NEXT:    adcs x8, x21, x17
+; CHECK-BE-NEXT:    stp x8, x9, [sp, #64] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    mov w8, #1
+; CHECK-BE-NEXT:    adc x8, x23, x8
+; CHECK-BE-NEXT:    and x24, x8, #0x1
+; CHECK-BE-NEXT:  .LBB4_4: // %udiv-do-while
+; CHECK-BE-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-BE-NEXT:    ldr x9, [sp, #88] // 8-byte Folded Reload
+; CHECK-BE-NEXT:    bfi x14, x27, #1, #63
+; CHECK-BE-NEXT:    extr x13, x29, x27, #63
+; CHECK-BE-NEXT:    extr x8, x28, x29, #63
+; CHECK-BE-NEXT:    extr x12, x26, x28, #63
+; CHECK-BE-NEXT:    extr x30, x30, x26, #63
+; CHECK-BE-NEXT:    cmp x9, x14
+; CHECK-BE-NEXT:    extr x26, x0, x2, #63
+; CHECK-BE-NEXT:    ldp x10, x9, [sp, #72] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    extr x2, x2, x4, #63
+; CHECK-BE-NEXT:    mov x16, xzr
+; CHECK-BE-NEXT:    orr x2, x22, x2
+; CHECK-BE-NEXT:    sbcs xzr, x9, x13
+; CHECK-BE-NEXT:    extr x9, x4, x1, #63
+; CHECK-BE-NEXT:    sbcs xzr, x10, x8
+; CHECK-BE-NEXT:    ldr x10, [sp, #64] // 8-byte Folded Reload
+; CHECK-BE-NEXT:    orr x1, x5, x1, lsl #1
+; CHECK-BE-NEXT:    sbcs xzr, x10, x12
+; CHECK-BE-NEXT:    lsr x10, x0, #63
+; CHECK-BE-NEXT:    sbc x4, x24, x30
+; CHECK-BE-NEXT:    orr x0, x22, x26
+; CHECK-BE-NEXT:    sbfx x11, x4, #0, #1
+; CHECK-BE-NEXT:    and x4, x11, x25
+; CHECK-BE-NEXT:    and x26, x11, x15
+; CHECK-BE-NEXT:    subs x27, x14, x4
+; CHECK-BE-NEXT:    and x14, x11, x18
+; CHECK-BE-NEXT:    sbcs x29, x13, x26
+; CHECK-BE-NEXT:    and x13, x11, x21
+; CHECK-BE-NEXT:    sbcs x28, x8, x14
+; CHECK-BE-NEXT:    and x8, x11, x23
+; CHECK-BE-NEXT:    sbcs x26, x12, x13
+; CHECK-BE-NEXT:    orr x4, x22, x9
+; CHECK-BE-NEXT:    sbc x8, x30, x8
+; CHECK-BE-NEXT:    subs x3, x3, #1
+; CHECK-BE-NEXT:    adcs x6, x6, x17
+; CHECK-BE-NEXT:    orr w9, w22, w10
+; CHECK-BE-NEXT:    adcs x7, x7, x17
+; CHECK-BE-NEXT:    mov w10, #1
+; CHECK-BE-NEXT:    adcs x19, x19, x17
+; CHECK-BE-NEXT:    and x14, x9, #0x1
+; CHECK-BE-NEXT:    adc x10, x20, x10
+; CHECK-BE-NEXT:    and x5, x11, #0x1
+; CHECK-BE-NEXT:    and x20, x10, #0x1
+; CHECK-BE-NEXT:    orr x10, x6, x19
+; CHECK-BE-NEXT:    orr x9, x3, x20
+; CHECK-BE-NEXT:    and x30, x8, #0x1
+; CHECK-BE-NEXT:    orr x9, x9, x7
+; CHECK-BE-NEXT:    mov x22, xzr
+; CHECK-BE-NEXT:    orr x8, x9, x10
+; CHECK-BE-NEXT:    cbnz x8, .LBB4_4
+; CHECK-BE-NEXT:    b .LBB4_6
+; CHECK-BE-NEXT:  .LBB4_5:
+; CHECK-BE-NEXT:    mov x16, xzr
+; CHECK-BE-NEXT:    mov x5, xzr
+; CHECK-BE-NEXT:  .LBB4_6: // %udiv-loop-exit
+; CHECK-BE-NEXT:    extr x8, x2, x4, #63
+; CHECK-BE-NEXT:    extr x9, x0, x2, #63
+; CHECK-BE-NEXT:    extr x10, x4, x1, #63
+; CHECK-BE-NEXT:    lsr x11, x0, #63
+; CHECK-BE-NEXT:    ldp x27, x22, [sp, #48] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    orr w11, w16, w11
+; CHECK-BE-NEXT:    orr x14, x5, x1, lsl #1
+; CHECK-BE-NEXT:    orr x1, x16, x9
+; CHECK-BE-NEXT:    orr x0, x16, x8
+; CHECK-BE-NEXT:    orr x2, x16, x10
+; CHECK-BE-NEXT:    and x6, x11, #0x1
+; CHECK-BE-NEXT:  .LBB4_7: // %udiv-end
+; CHECK-BE-NEXT:    eor x8, x14, x22
+; CHECK-BE-NEXT:    eor x9, x2, x22
+; CHECK-BE-NEXT:    subs x4, x8, x22
+; CHECK-BE-NEXT:    eor x8, x0, x22
+; CHECK-BE-NEXT:    sbcs x3, x9, x22
+; CHECK-BE-NEXT:    eor x9, x1, x22
+; CHECK-BE-NEXT:    sbcs x2, x8, x22
+; CHECK-BE-NEXT:    eor x8, x6, x27
+; CHECK-BE-NEXT:    sbcs x1, x9, x22
+; CHECK-BE-NEXT:    sbc x8, x8, x27
+; CHECK-BE-NEXT:    ldp x20, x19, [sp, #176] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    and x0, x8, #0x1
+; CHECK-BE-NEXT:    ldp x22, x21, [sp, #160] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    ldp x24, x23, [sp, #144] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    ldp x26, x25, [sp, #128] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    ldp x28, x27, [sp, #112] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    ldp x29, x30, [sp, #96] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    add sp, sp, #192
+; CHECK-BE-NEXT:    ret
+  %res = sdiv i257 %a, %b
+  ret i257 %res
+}
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -5,6 +5,7 @@
 ; CHECK:       ModulePass Manager
 ; CHECK-NEXT:    Pre-ISel Intrinsic Lowering
 ; CHECK-NEXT:    FunctionPass Manager
+; CHECK-NEXT:      Expand large div/rem
 ; CHECK-NEXT:      Expand Atomic instructions
 ; CHECK-NEXT:      Simplify the CFG
 ; CHECK-NEXT:      Dominator Tree Construction
diff --git a/llvm/test/CodeGen/ARM/udivmodei5.ll b/llvm/test/CodeGen/ARM/udivmodei5.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/udivmodei5.ll
@@ -0,0 +1,2376 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=arm-eabi < %s | FileCheck %s
+
+define void @udiv129(i129* %ptr, i129* %out) nounwind {
+; CHECK-LABEL: udiv129:
+; CHECK:       @ %bb.0: @ %_udiv-special-cases
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #92
+; CHECK-NEXT:    sub sp, sp, #92
+; CHECK-NEXT:    ldr r10, [r0, #12]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    ldr r8, [r0]
+; CHECK-NEXT:    ldr r5, [r0, #4]
+; CHECK-NEXT:    ldr r6, [r0, #8]
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    str r1, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    str r8, [sp, #84] @ 4-byte Spill
+; CHECK-NEXT:    str r5, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r9, r6
+; CHECK-NEXT:    str r6, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    cmp r10, #0
+; CHECK-NEXT:    addeq r7, r0, #32
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    addeq r6, r0, #32
+; CHECK-NEXT:    orrs r0, r9, r10
+; CHECK-NEXT:    ldrb r9, [r4, #16]
+; CHECK-NEXT:    addeq r7, r6, #64
+; CHECK-NEXT:    add r5, r7, #128
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    str r10, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    addne r5, r0, #96
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    subs r1, r5, #127
+; CHECK-NEXT:    sbcs r2, r0, #0
+; CHECK-NEXT:    sbcs r3, r0, #0
+; CHECK-NEXT:    sbcs r4, r0, #0
+; CHECK-NEXT:    sbcs r5, r0, #0
+; CHECK-NEXT:    sbcs r6, r0, #0
+; CHECK-NEXT:    sbcs r8, r0, #0
+; CHECK-NEXT:    sbc r12, r0, #0
+; CHECK-NEXT:    rsbs r7, r1, #127
+; CHECK-NEXT:    rscs r2, r2, #0
+; CHECK-NEXT:    rscs lr, r3, #0
+; CHECK-NEXT:    str lr, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    rscs r3, r4, #0
+; CHECK-NEXT:    str r3, [sp, #88] @ 4-byte Spill
+; CHECK-NEXT:    rscs r1, r5, #0
+; CHECK-NEXT:    rscs r11, r6, #0
+; CHECK-NEXT:    rscs r4, r8, #0
+; CHECK-NEXT:    rsc r5, r12, #0
+; CHECK-NEXT:    rsbs r6, r7, #128
+; CHECK-NEXT:    rscs r6, r2, #0
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    rscs r6, lr, #0
+; CHECK-NEXT:    mov lr, r1
+; CHECK-NEXT:    rscs r6, r3, #0
+; CHECK-NEXT:    ldr r2, [sp, #84] @ 4-byte Reload
+; CHECK-NEXT:    rscs r6, r1, #0
+; CHECK-NEXT:    rscs r1, r11, #0
+; CHECK-NEXT:    ldr r6, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    rscs r1, r4, #0
+; CHECK-NEXT:    rscs r1, r5, #0
+; CHECK-NEXT:    ldr r5, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    orr r1, r2, r9
+; CHECK-NEXT:    orr r4, r6, r10
+; CHECK-NEXT:    orr r1, r1, r5
+; CHECK-NEXT:    movlo r0, #1
+; CHECK-NEXT:    orr r1, r1, r4
+; CHECK-NEXT:    rsbs r4, r1, #0
+; CHECK-NEXT:    adc r1, r1, r4
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    orrs r3, r1, r0
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    movne r0, #0
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    movne r6, #0
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    bne .LBB0_6
+; CHECK-NEXT:  @ %bb.1: @ %_udiv-special-cases
+; CHECK-NEXT:    mov r10, r2
+; CHECK-NEXT:    and r8, lr, #1
+; CHECK-NEXT:    eor r2, r7, #128
+; CHECK-NEXT:    ldr lr, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    ldr r3, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    orr r2, r2, r8
+; CHECK-NEXT:    orr r11, r12, lr
+; CHECK-NEXT:    orr r2, r2, r3
+; CHECK-NEXT:    orrs r2, r2, r11
+; CHECK-NEXT:    ldr r2, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    beq .LBB0_9
+; CHECK-NEXT:  @ %bb.2: @ %udiv-bb1
+; CHECK-NEXT:    adds r4, r7, #1
+; CHECK-NEXT:    mov r11, r9
+; CHECK-NEXT:    adcs r0, r12, #0
+; CHECK-NEXT:    str r0, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:    adcs r2, r3, #0
+; CHECK-NEXT:    ldr r3, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    adcs r1, lr, #0
+; CHECK-NEXT:    str r1, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    adc r1, r8, #0
+; CHECK-NEXT:    and r1, r1, #1
+; CHECK-NEXT:    str r1, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    orr r1, r4, r1
+; CHECK-NEXT:    rsb r9, r7, #32
+; CHECK-NEXT:    orr r1, r1, r2
+; CHECK-NEXT:    ldr r12, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    orr r0, r1, r0
+; CHECK-NEXT:    str r0, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    lsr r0, r10, r7
+; CHECK-NEXT:    subs r1, r7, #32
+; CHECK-NEXT:    orr r0, r0, r3, lsl r9
+; CHECK-NEXT:    str r4, [sp, #88] @ 4-byte Spill
+; CHECK-NEXT:    lsrpl r0, r3, r1
+; CHECK-NEXT:    rsb r1, r7, #64
+; CHECK-NEXT:    rsb r4, r7, #128
+; CHECK-NEXT:    mov lr, r10
+; CHECK-NEXT:    rsb r5, r4, #64
+; CHECK-NEXT:    ldr r10, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    str r1, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    rsb r6, r5, #32
+; CHECK-NEXT:    lsl r1, r12, r1
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    str r2, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    lsr r2, r12, r5
+; CHECK-NEXT:    movpl r1, #0
+; CHECK-NEXT:    rsbs r8, r4, #32
+; CHECK-NEXT:    orr r2, r2, r10, lsl r6
+; CHECK-NEXT:    str r6, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    lsrpl r2, r10, r8
+; CHECK-NEXT:    cmp r7, #64
+; CHECK-NEXT:    orrlo r2, r0, r1
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    moveq r2, lr
+; CHECK-NEXT:    lsl r0, r11, r4
+; CHECK-NEXT:    rsbs r6, r7, #96
+; CHECK-NEXT:    str r11, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    cmp r4, #64
+; CHECK-NEXT:    orrlo r2, r2, r0
+; CHECK-NEXT:    mvn r0, #31
+; CHECK-NEXT:    subs r0, r0, r7
+; CHECK-NEXT:    rsb r0, r7, #0
+; CHECK-NEXT:    mov r7, r3
+; CHECK-NEXT:    mov r3, #0
+; CHECK-NEXT:    lsl r1, lr, r0
+; CHECK-NEXT:    movpl r1, #0
+; CHECK-NEXT:    cmp r0, #64
+; CHECK-NEXT:    movhs r1, r3
+; CHECK-NEXT:    cmp r4, #128
+; CHECK-NEXT:    movlo r1, r2
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    moveq r1, r11
+; CHECK-NEXT:    ldr r11, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    lsr r0, r12, r8
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    rsb r3, r11, #32
+; CHECK-NEXT:    orr r0, r0, r10, lsl r4
+; CHECK-NEXT:    lslpl r0, r12, r6
+; CHECK-NEXT:    lsr r2, r7, r5
+; CHECK-NEXT:    lsr r3, lr, r3
+; CHECK-NEXT:    cmp r8, #0
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    orr r3, r3, r7, lsl r11
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    lslpl r3, lr, r9
+; CHECK-NEXT:    cmp r4, #64
+; CHECK-NEXT:    orrlo r3, r0, r2
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    ldr r2, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    moveq r3, r10
+; CHECK-NEXT:    cmp r4, #128
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movhs r3, r0
+; CHECK-NEXT:    lsr r0, lr, r5
+; CHECK-NEXT:    orr r0, r0, r7, lsl r2
+; CHECK-NEXT:    cmp r8, #0
+; CHECK-NEXT:    lsrpl r0, r7, r8
+; CHECK-NEXT:    lsl r2, r12, r4
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    lsl r10, lr, r11
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    movpl r10, #0
+; CHECK-NEXT:    cmp r4, #64
+; CHECK-NEXT:    orrlo r10, r2, r0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    moveq r10, r12
+; CHECK-NEXT:    cmp r4, #128
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    lsl r5, lr, r4
+; CHECK-NEXT:    movhs r10, r0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    movpl r5, #0
+; CHECK-NEXT:    cmp r4, #64
+; CHECK-NEXT:    movhs r5, r0
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    cmp r4, #128
+; CHECK-NEXT:    mov r9, r7
+; CHECK-NEXT:    movhs r5, r0
+; CHECK-NEXT:    lsr r0, lr, r8
+; CHECK-NEXT:    orr r7, r0, r7, lsl r4
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    lslpl r7, lr, r6
+; CHECK-NEXT:    mov r6, #0
+; CHECK-NEXT:    cmp r4, #64
+; CHECK-NEXT:    ldr r0, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    movhs r7, r6
+; CHECK-NEXT:    cmp r4, #128
+; CHECK-NEXT:    movhs r7, r6
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    beq .LBB0_7
+; CHECK-NEXT:  @ %bb.3: @ %udiv-preheader
+; CHECK-NEXT:    ldr r4, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    and r0, r1, #1
+; CHECK-NEXT:    str r0, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    rsb r1, r4, #32
+; CHECK-NEXT:    str r1, [sp] @ 4-byte Spill
+; CHECK-NEXT:    lsr r0, lr, r4
+; CHECK-NEXT:    ldr r8, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    orr r11, r0, r9, lsl r1
+; CHECK-NEXT:    subs r0, r4, #32
+; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    rsb r6, r4, #96
+; CHECK-NEXT:    lsrpl r11, r9, r0
+; CHECK-NEXT:    rsb r0, r4, #64
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    sub r1, r4, #64
+; CHECK-NEXT:    lsl r2, r12, r0
+; CHECK-NEXT:    str r1, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    lsr r1, r12, r1
+; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    subs r0, r4, #96
+; CHECK-NEXT:    orr r1, r1, r8, lsl r6
+; CHECK-NEXT:    mov r9, r12
+; CHECK-NEXT:    lsrpl r1, r8, r0
+; CHECK-NEXT:    cmp r4, #64
+; CHECK-NEXT:    orrlo r1, r11, r2
+; CHECK-NEXT:    ldr r11, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    rsb r12, r4, #128
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    moveq r1, lr
+; CHECK-NEXT:    str r6, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    lsl r0, r11, r12
+; CHECK-NEXT:    sub r6, r4, #128
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    cmp r12, #64
+; CHECK-NEXT:    orrlo r1, r1, r0
+; CHECK-NEXT:    subs r2, r4, #160
+; CHECK-NEXT:    lsr r0, r11, r6
+; CHECK-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    cmp r6, #64
+; CHECK-NEXT:    str r7, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    movhs r0, r2
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    moveq r0, r11
+; CHECK-NEXT:    cmp r4, #128
+; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    movlo r0, r1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; CHECK-NEXT:    moveq r0, lr
+; CHECK-NEXT:    rsb lr, r7, #32
+; CHECK-NEXT:    str r0, [sp, #84] @ 4-byte Spill
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    lsr r0, r11, lr
+; CHECK-NEXT:    str r3, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    lslpl r0, r11, r2
+; CHECK-NEXT:    rsbs r8, r12, #32
+; CHECK-NEXT:    mov r1, #0
+; CHECK-NEXT:    str r5, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    movpl r1, #0
+; CHECK-NEXT:    cmp r12, #64
+; CHECK-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    movhs r1, r0
+; CHECK-NEXT:    cmp r12, #0
+; CHECK-NEXT:    moveq r1, r12
+; CHECK-NEXT:    lsr r0, r3, r4
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    cmp r4, #64
+; CHECK-NEXT:    orrlo r1, r1, r0
+; CHECK-NEXT:    cmp r4, #128
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movhs r1, r0
+; CHECK-NEXT:    rsb r0, r12, #64
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    moveq r1, r3
+; CHECK-NEXT:    lsr r0, r11, r0
+; CHECK-NEXT:    cmp r8, #0
+; CHECK-NEXT:    str r1, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    lsl r1, r11, r7
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    movpl r1, #0
+; CHECK-NEXT:    cmp r12, #64
+; CHECK-NEXT:    movlo r1, r0
+; CHECK-NEXT:    lsr r0, r9, r4
+; CHECK-NEXT:    cmp r12, #0
+; CHECK-NEXT:    orr r0, r0, r3, lsl r2
+; CHECK-NEXT:    moveq r1, r12
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    lsrpl r0, r3, r5
+; CHECK-NEXT:    cmp r4, #64
+; CHECK-NEXT:    orrlo r1, r1, r0
+; CHECK-NEXT:    cmp r4, #128
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movhs r1, r0
+; CHECK-NEXT:    lsr r0, r9, lr
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    orr r0, r0, r3, lsl r7
+; CHECK-NEXT:    moveq r1, r9
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    lslpl r0, r9, r2
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    ldr r5, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    ldr r7, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    ldr r9, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    lsr r3, r3, r5
+; CHECK-NEXT:    ldr r5, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    lsr r2, r7, r4
+; CHECK-NEXT:    ldr lr, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    movpl r3, #0
+; CHECK-NEXT:    cmp r4, #64
+; CHECK-NEXT:    orrlo r3, r2, r0
+; CHECK-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    lsr r0, r11, r8
+; CHECK-NEXT:    moveq r3, r7
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    lslpl r0, r11, r2
+; CHECK-NEXT:    cmp r12, #64
+; CHECK-NEXT:    orrlo r3, r3, r0
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    mov r8, r10
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    str r0, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    cmp r6, #64
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movhs r2, r0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    moveq r2, r6
+; CHECK-NEXT:    cmp r4, #128
+; CHECK-NEXT:    movlo r2, r3
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    ldr r12, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    moveq r2, r7
+; CHECK-NEXT:    ldr r10, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    ldr r4, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    str r2, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:  .LBB0_4: @ %udiv-do-while
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    ldr r6, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    subs r0, r0, #1
+; CHECK-NEXT:    str r0, [sp, #88] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r9, r9, #0
+; CHECK-NEXT:    sbcs r10, r10, #0
+; CHECK-NEXT:    sbcs r4, r4, #0
+; CHECK-NEXT:    adc r3, lr, #1
+; CHECK-NEXT:    orr r2, r9, r4
+; CHECK-NEXT:    and lr, r3, #1
+; CHECK-NEXT:    orr r3, r0, lr
+; CHECK-NEXT:    orr r3, r3, r10
+; CHECK-NEXT:    orr r0, r3, r2
+; CHECK-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    lsl r3, r6, #1
+; CHECK-NEXT:    orr r3, r3, r1, lsr #31
+; CHECK-NEXT:    lsl r1, r1, #1
+; CHECK-NEXT:    orr r1, r1, r0, lsr #31
+; CHECK-NEXT:    lsl r7, r0, #1
+; CHECK-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; CHECK-NEXT:    orr r5, r7, r0, lsr #31
+; CHECK-NEXT:    and r7, r12, #1
+; CHECK-NEXT:    orr r7, r7, r0, lsl #1
+; CHECK-NEXT:    ldr r12, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    rsbs r0, r7, #2
+; CHECK-NEXT:    rscs r0, r5, #0
+; CHECK-NEXT:    rscs r0, r1, #0
+; CHECK-NEXT:    rscs r0, r3, #0
+; CHECK-NEXT:    sbc r0, r12, r6, lsr #31
+; CHECK-NEXT:    ldr r6, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    rsb r0, r0, #0
+; CHECK-NEXT:    and r2, r0, #1
+; CHECK-NEXT:    and r0, r0, #3
+; CHECK-NEXT:    subs r0, r7, r0
+; CHECK-NEXT:    str r0, [sp, #84] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r0, r5, #0
+; CHECK-NEXT:    str r0, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r1, r1, #0
+; CHECK-NEXT:    orr r5, r11, r6, lsl #1
+; CHECK-NEXT:    sbcs r0, r3, #0
+; CHECK-NEXT:    ldr r3, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    str r0, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    str r5, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    lsl r0, r3, #1
+; CHECK-NEXT:    orr r0, r0, r6, lsr #31
+; CHECK-NEXT:    ldr r6, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    orr r7, r12, r0
+; CHECK-NEXT:    lsl r0, r8, #1
+; CHECK-NEXT:    orr r0, r0, r3, lsr #31
+; CHECK-NEXT:    str r7, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    orr r3, r12, r0
+; CHECK-NEXT:    lsl r0, r6, #1
+; CHECK-NEXT:    orr r0, r0, r8, lsr #31
+; CHECK-NEXT:    orr r11, r12, r0
+; CHECK-NEXT:    orr r0, r12, r6, lsr #31
+; CHECK-NEXT:    and r12, r0, #1
+; CHECK-NEXT:    ldr r0, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    mov r8, r3
+; CHECK-NEXT:    str r11, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    bne .LBB0_4
+; CHECK-NEXT:  @ %bb.5:
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    ldr r2, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    b .LBB0_8
+; CHECK-NEXT:  .LBB0_6:
+; CHECK-NEXT:    ldr r2, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    b .LBB0_9
+; CHECK-NEXT:  .LBB0_7:
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    mov r12, #0
+; CHECK-NEXT:    str r0, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:  .LBB0_8: @ %udiv-loop-exit
+; CHECK-NEXT:    lsl r1, r7, #1
+; CHECK-NEXT:    orr r4, r6, r5, lsl #1
+; CHECK-NEXT:    ldr r6, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    orr r1, r1, r5, lsr #31
+; CHECK-NEXT:    ldr r2, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    orr r6, r6, r1
+; CHECK-NEXT:    lsl r1, r10, #1
+; CHECK-NEXT:    orr r1, r1, r7, lsr #31
+; CHECK-NEXT:    orr r5, r0, r1
+; CHECK-NEXT:    lsl r1, r3, #1
+; CHECK-NEXT:    orr r1, r1, r10, lsr #31
+; CHECK-NEXT:    orr r1, r0, r1
+; CHECK-NEXT:    orr r0, r12, r3, lsr #31
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:  .LBB0_9: @ %udiv-end
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    stm r2, {r4, r6}
+; CHECK-NEXT:    str r5, [r2, #8]
+; CHECK-NEXT:    str r1, [r2, #12]
+; CHECK-NEXT:    strb r0, [r2, #16]
+; CHECK-NEXT:    add sp, sp, #92
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    mov pc, lr
+  %a = load i129, i129* %ptr
+  %res = udiv i129 %a, 3
+  store i129 %res, i129* %out
+  ret void
+}
+
+define i129 @urem129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: urem129:
+; CHECK:       @ %bb.0: @ %_udiv-special-cases
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #84
+; CHECK-NEXT:    sub sp, sp, #84
+; CHECK-NEXT:    ldr r1, [sp, #128]
+; CHECK-NEXT:    mov r9, r2
+; CHECK-NEXT:    ldr r7, [sp, #124]
+; CHECK-NEXT:    mov r4, r3
+; CHECK-NEXT:    and r6, r1, #1
+; CHECK-NEXT:    ldr r5, [sp, #120]
+; CHECK-NEXT:    orr r1, r2, r6
+; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    orr r0, r3, r7
+; CHECK-NEXT:    orr r1, r1, r5
+; CHECK-NEXT:    orr r0, r1, r0
+; CHECK-NEXT:    str r2, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    rsbs r1, r0, #0
+; CHECK-NEXT:    ldr r2, [sp, #148]
+; CHECK-NEXT:    adc r0, r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #140]
+; CHECK-NEXT:    ldr r3, [sp, #144]
+; CHECK-NEXT:    orr r1, r1, r2
+; CHECK-NEXT:    ldr r2, [sp, #152]
+; CHECK-NEXT:    and r10, r2, #1
+; CHECK-NEXT:    ldr r2, [sp, #136]
+; CHECK-NEXT:    orr r2, r2, r10
+; CHECK-NEXT:    orr r2, r2, r3
+; CHECK-NEXT:    orr r1, r2, r1
+; CHECK-NEXT:    rsbs r2, r1, #0
+; CHECK-NEXT:    adc r1, r1, r2
+; CHECK-NEXT:    orr r0, r1, r0
+; CHECK-NEXT:    str r0, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    str r4, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    addeq r11, r0, #32
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    addeq r8, r0, #32
+; CHECK-NEXT:    orrs r0, r5, r7
+; CHECK-NEXT:    addeq r11, r8, #64
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    add r4, r11, #128
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    mov r7, #0
+; CHECK-NEXT:    addne r4, r0, #96
+; CHECK-NEXT:    ldr r5, [sp, #148]
+; CHECK-NEXT:    subs r0, r4, #127
+; CHECK-NEXT:    str r0, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r0, r7, #0
+; CHECK-NEXT:    str r0, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r0, r7, #0
+; CHECK-NEXT:    str r0, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r11, r7, #0
+; CHECK-NEXT:    str r6, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r0, r7, #0
+; CHECK-NEXT:    str r0, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r0, r7, #0
+; CHECK-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r0, r7, #0
+; CHECK-NEXT:    str r0, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    sbc r0, r7, #0
+; CHECK-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    ldr r8, [sp, #144]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    ldr r9, [sp, #140]
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    addeq r4, r0, #32
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    ldr r0, [sp, #136]
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    addeq r6, r0, #32
+; CHECK-NEXT:    orrs r0, r8, r5
+; CHECK-NEXT:    addeq r4, r6, #64
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    add r4, r4, #128
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    cmp r10, #0
+; CHECK-NEXT:    ldr r8, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    addne r4, r0, #96
+; CHECK-NEXT:    subs r0, r4, #127
+; CHECK-NEXT:    sbcs r1, r7, #0
+; CHECK-NEXT:    sbcs r2, r7, #0
+; CHECK-NEXT:    sbcs r3, r7, #0
+; CHECK-NEXT:    sbcs r4, r7, #0
+; CHECK-NEXT:    sbcs r5, r7, #0
+; CHECK-NEXT:    sbcs r6, r7, #0
+; CHECK-NEXT:    sbc lr, r7, #0
+; CHECK-NEXT:    ldr r7, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    subs r7, r0, r7
+; CHECK-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r10, r1, r0
+; CHECK-NEXT:    ldr r0, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r9, r2, r0
+; CHECK-NEXT:    ldr r0, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r12, r3, r11
+; CHECK-NEXT:    ldr r3, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r2, r4, r0
+; CHECK-NEXT:    ldr r0, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    ldr r11, [sp, #120]
+; CHECK-NEXT:    sbcs r0, r5, r0
+; CHECK-NEXT:    ldr r4, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r1, r6, r1
+; CHECK-NEXT:    sbc r3, lr, r3
+; CHECK-NEXT:    rsbs r6, r7, #128
+; CHECK-NEXT:    rscs r6, r10, #0
+; CHECK-NEXT:    ldr lr, [sp, #124]
+; CHECK-NEXT:    rscs r6, r9, #0
+; CHECK-NEXT:    rscs r6, r12, #0
+; CHECK-NEXT:    rscs r6, r2, #0
+; CHECK-NEXT:    rscs r0, r0, #0
+; CHECK-NEXT:    ldr r6, [sp, #128]
+; CHECK-NEXT:    rscs r0, r1, #0
+; CHECK-NEXT:    ldr r1, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    rscs r0, r3, #0
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movlo r0, #1
+; CHECK-NEXT:    orrs r5, r1, r0
+; CHECK-NEXT:    movne r6, #0
+; CHECK-NEXT:    and r0, r6, #1
+; CHECK-NEXT:    movne lr, #0
+; CHECK-NEXT:    movne r11, #0
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    movne r8, #0
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    bne .LBB1_8
+; CHECK-NEXT:  @ %bb.1: @ %_udiv-special-cases
+; CHECK-NEXT:    and r2, r2, #1
+; CHECK-NEXT:    eor r6, r7, #128
+; CHECK-NEXT:    orr r6, r6, r2
+; CHECK-NEXT:    orr r5, r10, r12
+; CHECK-NEXT:    orr r6, r6, r9
+; CHECK-NEXT:    orrs r5, r6, r5
+; CHECK-NEXT:    beq .LBB1_8
+; CHECK-NEXT:  @ %bb.2: @ %udiv-bb1
+; CHECK-NEXT:    adds r3, r7, #1
+; CHECK-NEXT:    str r3, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    adcs r0, r10, #0
+; CHECK-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    adcs r6, r9, #0
+; CHECK-NEXT:    ldr lr, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    adcs r1, r12, #0
+; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    adc r1, r2, #0
+; CHECK-NEXT:    and r1, r1, #1
+; CHECK-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    orr r1, r3, r1
+; CHECK-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    orr r1, r1, r6
+; CHECK-NEXT:    rsb r10, r7, #32
+; CHECK-NEXT:    orr r0, r1, r0
+; CHECK-NEXT:    str r0, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    lsr r0, r3, r7
+; CHECK-NEXT:    subs r1, r7, #32
+; CHECK-NEXT:    orr r0, r0, lr, lsl r10
+; CHECK-NEXT:    rsb r5, r7, #128
+; CHECK-NEXT:    lsrpl r0, lr, r1
+; CHECK-NEXT:    ldr r1, [sp, #120]
+; CHECK-NEXT:    rsb r2, r7, #64
+; CHECK-NEXT:    rsb r9, r5, #64
+; CHECK-NEXT:    ldr r11, [sp, #124]
+; CHECK-NEXT:    cmp r10, #0
+; CHECK-NEXT:    str r6, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    rsb r6, r9, #32
+; CHECK-NEXT:    str r2, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    lsl r2, r1, r2
+; CHECK-NEXT:    lsr r1, r1, r9
+; CHECK-NEXT:    orr r4, r1, r11, lsl r6
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    str r6, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    rsbs r6, r5, #32
+; CHECK-NEXT:    mov r1, #0
+; CHECK-NEXT:    lsrpl r4, r11, r6
+; CHECK-NEXT:    cmp r7, #64
+; CHECK-NEXT:    orrlo r4, r0, r2
+; CHECK-NEXT:    ldr r2, [sp, #128]
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    mov r11, r3
+; CHECK-NEXT:    moveq r4, r3
+; CHECK-NEXT:    rsbs r8, r7, #96
+; CHECK-NEXT:    lsl r0, r2, r5
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    cmp r5, #64
+; CHECK-NEXT:    orrlo r4, r4, r0
+; CHECK-NEXT:    mvn r0, #31
+; CHECK-NEXT:    subs r0, r0, r7
+; CHECK-NEXT:    rsb r0, r7, #0
+; CHECK-NEXT:    ldr r7, [sp, #124]
+; CHECK-NEXT:    lsl r12, r3, r0
+; CHECK-NEXT:    lsr r3, lr, r9
+; CHECK-NEXT:    movpl r12, #0
+; CHECK-NEXT:    cmp r0, #64
+; CHECK-NEXT:    movhs r12, r1
+; CHECK-NEXT:    cmp r5, #128
+; CHECK-NEXT:    movlo r12, r4
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    moveq r12, r2
+; CHECK-NEXT:    ldr r2, [sp, #120]
+; CHECK-NEXT:    ldr r4, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT:    cmp r8, #0
+; CHECK-NEXT:    lsr r0, r2, r6
+; CHECK-NEXT:    orr r0, r0, r7, lsl r5
+; CHECK-NEXT:    lslpl r0, r2, r8
+; CHECK-NEXT:    rsb r2, r4, #32
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    lsr r2, r11, r2
+; CHECK-NEXT:    movpl r3, #0
+; CHECK-NEXT:    orr r2, r2, lr, lsl r4
+; CHECK-NEXT:    cmp r10, #0
+; CHECK-NEXT:    lslpl r2, r11, r10
+; CHECK-NEXT:    cmp r5, #64
+; CHECK-NEXT:    orrlo r2, r0, r3
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    moveq r2, r7
+; CHECK-NEXT:    cmp r5, #128
+; CHECK-NEXT:    movhs r2, r1
+; CHECK-NEXT:    lsr r0, r11, r9
+; CHECK-NEXT:    str r2, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    ldr r2, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    lsl r7, r11, r4
+; CHECK-NEXT:    ldr r9, [sp, #120]
+; CHECK-NEXT:    lsl r4, r11, r5
+; CHECK-NEXT:    orr r0, r0, lr, lsl r2
+; CHECK-NEXT:    lsrpl r0, lr, r6
+; CHECK-NEXT:    lsl r2, r9, r5
+; CHECK-NEXT:    cmp r8, #0
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    cmp r10, #0
+; CHECK-NEXT:    movpl r7, #0
+; CHECK-NEXT:    cmp r5, #64
+; CHECK-NEXT:    orrlo r7, r2, r0
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    moveq r7, r9
+; CHECK-NEXT:    cmp r5, #128
+; CHECK-NEXT:    movhs r7, r1
+; CHECK-NEXT:    cmp r8, #0
+; CHECK-NEXT:    movpl r4, #0
+; CHECK-NEXT:    cmp r5, #64
+; CHECK-NEXT:    movhs r4, r1
+; CHECK-NEXT:    cmp r5, #128
+; CHECK-NEXT:    lsr r0, r11, r6
+; CHECK-NEXT:    movhs r4, r1
+; CHECK-NEXT:    orr r6, r0, lr, lsl r5
+; CHECK-NEXT:    cmp r8, #0
+; CHECK-NEXT:    lslpl r6, r11, r8
+; CHECK-NEXT:    cmp r5, #64
+; CHECK-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    movhs r6, r1
+; CHECK-NEXT:    cmp r5, #128
+; CHECK-NEXT:    movhs r6, r1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    beq .LBB1_6
+; CHECK-NEXT:  @ %bb.3: @ %udiv-preheader
+; CHECK-NEXT:    str r6, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    and r0, r12, #1
+; CHECK-NEXT:    ldr r6, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT:    mov r3, lr
+; CHECK-NEXT:    rsb r9, r6, #32
+; CHECK-NEXT:    subs r1, r6, #32
+; CHECK-NEXT:    lsr r0, r11, r6
+; CHECK-NEXT:    rsb r5, r6, #64
+; CHECK-NEXT:    orr r0, r0, lr, lsl r9
+; CHECK-NEXT:    str r7, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    ldr r7, [sp, #124]
+; CHECK-NEXT:    lsrpl r0, lr, r1
+; CHECK-NEXT:    str r1, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    mov r3, r2
+; CHECK-NEXT:    lsl r1, r2, r5
+; CHECK-NEXT:    sub r2, r6, #64
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    rsb lr, r6, #96
+; CHECK-NEXT:    movpl r1, #0
+; CHECK-NEXT:    subs r10, r6, #96
+; CHECK-NEXT:    str r4, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    lsr r12, r3, r2
+; CHECK-NEXT:    str r2, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    orr r2, r12, r7, lsl lr
+; CHECK-NEXT:    ldr r4, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    lsrpl r2, r7, r10
+; CHECK-NEXT:    cmp r6, #64
+; CHECK-NEXT:    rsb r12, r6, #128
+; CHECK-NEXT:    orrlo r2, r0, r1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    moveq r2, r11
+; CHECK-NEXT:    lsl r0, r4, r12
+; CHECK-NEXT:    cmp lr, #0
+; CHECK-NEXT:    sub r8, r6, #128
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    cmp r12, #64
+; CHECK-NEXT:    orrlo r2, r2, r0
+; CHECK-NEXT:    subs r1, r6, #160
+; CHECK-NEXT:    lsr r0, r4, r8
+; CHECK-NEXT:    str r1, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    mov r1, #0
+; CHECK-NEXT:    cmp r8, #64
+; CHECK-NEXT:    rsb r7, r5, #32
+; CHECK-NEXT:    movhs r0, r1
+; CHECK-NEXT:    cmp r8, #0
+; CHECK-NEXT:    moveq r0, r4
+; CHECK-NEXT:    cmp r6, #128
+; CHECK-NEXT:    movlo r0, r2
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    moveq r0, r11
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    str r0, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    lsr r0, r3, r7
+; CHECK-NEXT:    ldr r3, [sp, #124]
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    str r5, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    mov r11, r9
+; CHECK-NEXT:    ldr r2, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    orr r0, r0, r3, lsl r5
+; CHECK-NEXT:    ldr r5, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    lslpl r0, r1, r9
+; CHECK-NEXT:    ldr r9, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    lsr r2, r3, r2
+; CHECK-NEXT:    lsr r1, r5, r6
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    movpl r1, #0
+; CHECK-NEXT:    cmp r10, #0
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    cmp r6, #64
+; CHECK-NEXT:    rsb r10, r12, #32
+; CHECK-NEXT:    orrlo r2, r1, r0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-NEXT:    moveq r2, r5
+; CHECK-NEXT:    lsr r0, r4, r10
+; CHECK-NEXT:    cmp lr, #0
+; CHECK-NEXT:    lslpl r0, r4, lr
+; CHECK-NEXT:    cmp r12, #64
+; CHECK-NEXT:    orrlo r2, r2, r0
+; CHECK-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    mov lr, #0
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    cmp r8, #64
+; CHECK-NEXT:    movhs r0, lr
+; CHECK-NEXT:    cmp r8, #0
+; CHECK-NEXT:    moveq r0, r8
+; CHECK-NEXT:    cmp r6, #128
+; CHECK-NEXT:    movlo r0, r2
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    moveq r0, r5
+; CHECK-NEXT:    cmp r11, #0
+; CHECK-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    lsr r0, r4, r7
+; CHECK-NEXT:    lslpl r0, r4, r11
+; CHECK-NEXT:    cmp r10, #0
+; CHECK-NEXT:    mov r8, #0
+; CHECK-NEXT:    ldr r5, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    movpl r8, #0
+; CHECK-NEXT:    cmp r12, #64
+; CHECK-NEXT:    movhs r8, r0
+; CHECK-NEXT:    cmp r12, #0
+; CHECK-NEXT:    moveq r8, r12
+; CHECK-NEXT:    lsr r0, r3, r6
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    mov r7, r3
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    cmp r6, #64
+; CHECK-NEXT:    orrlo r8, r8, r0
+; CHECK-NEXT:    rsb r0, r12, #64
+; CHECK-NEXT:    cmp r6, #128
+; CHECK-NEXT:    mov r3, #0
+; CHECK-NEXT:    movhs r8, lr
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    moveq r8, r7
+; CHECK-NEXT:    lsr r0, r4, r0
+; CHECK-NEXT:    cmp r10, #0
+; CHECK-NEXT:    lsl r4, r4, r5
+; CHECK-NEXT:    ldr r5, [sp, #120]
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    cmp r11, #0
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    movpl r4, #0
+; CHECK-NEXT:    cmp r12, #64
+; CHECK-NEXT:    movlo r4, r0
+; CHECK-NEXT:    lsr r0, r5, r6
+; CHECK-NEXT:    cmp r12, #0
+; CHECK-NEXT:    orr r0, r0, r7, lsl r11
+; CHECK-NEXT:    moveq r4, r12
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    lsrpl r0, r7, r9
+; CHECK-NEXT:    cmp r6, #64
+; CHECK-NEXT:    orrlo r4, r4, r0
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    cmp r6, #128
+; CHECK-NEXT:    str r0, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #136]
+; CHECK-NEXT:    movhs r4, r3
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    ldr r10, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    moveq r4, r5
+; CHECK-NEXT:    subs r0, r0, #1
+; CHECK-NEXT:    str r0, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #140]
+; CHECK-NEXT:    ldr r9, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r0, r0, #0
+; CHECK-NEXT:    str r0, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #144]
+; CHECK-NEXT:    ldr r11, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r0, r0, #0
+; CHECK-NEXT:    str r0, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #148]
+; CHECK-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r0, r0, #0
+; CHECK-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #152]
+; CHECK-NEXT:    ldr lr, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    adc r0, r0, #1
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    str r0, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:  .LBB1_4: @ %udiv-do-while
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    and r6, r1, #1
+; CHECK-NEXT:    ldr r0, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    ldr r5, [sp, #136]
+; CHECK-NEXT:    subs r0, r0, #1
+; CHECK-NEXT:    str r0, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r10, r10, #0
+; CHECK-NEXT:    sbcs r9, r9, #0
+; CHECK-NEXT:    sbcs r11, r11, #0
+; CHECK-NEXT:    adc r3, r12, #1
+; CHECK-NEXT:    orr r2, r10, r11
+; CHECK-NEXT:    and r12, r3, #1
+; CHECK-NEXT:    orr r3, r0, r12
+; CHECK-NEXT:    orr r3, r3, r9
+; CHECK-NEXT:    orr r0, r3, r2
+; CHECK-NEXT:    str r0, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    lsl r3, r4, #1
+; CHECK-NEXT:    lsl r2, r8, #1
+; CHECK-NEXT:    orr r2, r2, r4, lsr #31
+; CHECK-NEXT:    orr r3, r3, r0, lsr #31
+; CHECK-NEXT:    lsl r7, r0, #1
+; CHECK-NEXT:    ldr r0, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    orr r7, r7, r0, lsr #31
+; CHECK-NEXT:    orr r6, r6, r0, lsl #1
+; CHECK-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    subs r4, r0, r6
+; CHECK-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r4, r0, r7
+; CHECK-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r4, r0, r3
+; CHECK-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r4, r0, r2
+; CHECK-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    sbc r4, r0, r8, lsr #31
+; CHECK-NEXT:    ldr r0, [sp, #148]
+; CHECK-NEXT:    and r4, r4, #1
+; CHECK-NEXT:    rsb r1, r4, #0
+; CHECK-NEXT:    and r8, r1, r0
+; CHECK-NEXT:    ldr r0, [sp, #144]
+; CHECK-NEXT:    and r5, r1, r5
+; CHECK-NEXT:    and r4, r1, r0
+; CHECK-NEXT:    ldr r0, [sp, #140]
+; CHECK-NEXT:    subs r5, r6, r5
+; CHECK-NEXT:    str r5, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    and r0, r1, r0
+; CHECK-NEXT:    and r5, r1, #1
+; CHECK-NEXT:    sbcs r0, r7, r0
+; CHECK-NEXT:    ldr r1, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r4, r3, r4
+; CHECK-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r8, r2, r8
+; CHECK-NEXT:    ldr r2, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    ldr r3, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    orr r7, r0, r1, lsl #1
+; CHECK-NEXT:    lsl r0, r2, #1
+; CHECK-NEXT:    orr r0, r0, r1, lsr #31
+; CHECK-NEXT:    ldr r1, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    str r7, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    orr r6, r1, r0
+; CHECK-NEXT:    lsl r0, r3, #1
+; CHECK-NEXT:    orr r0, r0, r2, lsr #31
+; CHECK-NEXT:    str r6, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    orr r2, r1, r0
+; CHECK-NEXT:    lsl r0, lr, #1
+; CHECK-NEXT:    orr r0, r0, r3, lsr #31
+; CHECK-NEXT:    str r2, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    orr r3, r1, r0
+; CHECK-NEXT:    orr r0, r1, lr, lsr #31
+; CHECK-NEXT:    and r1, r0, #1
+; CHECK-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    mov lr, r3
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    bne .LBB1_4
+; CHECK-NEXT:  @ %bb.5:
+; CHECK-NEXT:    ldr r12, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    mov r4, r7
+; CHECK-NEXT:    mov r7, r2
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov lr, r12
+; CHECK-NEXT:    b .LBB1_7
+; CHECK-NEXT:  .LBB1_6:
+; CHECK-NEXT:    ldr r3, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    mov lr, #0
+; CHECK-NEXT:    mov r12, #0
+; CHECK-NEXT:    str r0, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:  .LBB1_7: @ %udiv-loop-exit
+; CHECK-NEXT:    lsl r2, r6, #1
+; CHECK-NEXT:    orr r8, r1, r4, lsl #1
+; CHECK-NEXT:    ldr r1, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    orr r2, r2, r4, lsr #31
+; CHECK-NEXT:    orr r4, r1, r2
+; CHECK-NEXT:    lsl r2, r7, #1
+; CHECK-NEXT:    orr r1, r2, r6, lsr #31
+; CHECK-NEXT:    lsl r2, r3, #1
+; CHECK-NEXT:    orr r2, r2, r7, lsr #31
+; CHECK-NEXT:    orr r11, lr, r1
+; CHECK-NEXT:    orr lr, lr, r2
+; CHECK-NEXT:    orr r2, r12, r3, lsr #31
+; CHECK-NEXT:    and r0, r2, #1
+; CHECK-NEXT:  .LBB1_8: @ %udiv-end
+; CHECK-NEXT:    ldr r10, [sp, #136]
+; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    str r0, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    mov r9, #0
+; CHECK-NEXT:    ldr r12, [sp, #140]
+; CHECK-NEXT:    umull r0, r7, r10, r8
+; CHECK-NEXT:    umlal r7, r2, r12, r8
+; CHECK-NEXT:    str r0, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, lr
+; CHECK-NEXT:    umull r5, r6, r10, r4
+; CHECK-NEXT:    ldr lr, [sp, #144]
+; CHECK-NEXT:    umull r3, r1, lr, r8
+; CHECK-NEXT:    adds r5, r5, r7
+; CHECK-NEXT:    adcs r5, r2, r6
+; CHECK-NEXT:    umlal r7, r2, r10, r4
+; CHECK-NEXT:    ldr r10, [sp, #148]
+; CHECK-NEXT:    mov r6, #0
+; CHECK-NEXT:    adc r5, r9, #0
+; CHECK-NEXT:    umlal r1, r6, r10, r8
+; CHECK-NEXT:    umlal r2, r5, r12, r4
+; CHECK-NEXT:    umlal r1, r6, lr, r4
+; CHECK-NEXT:    adds r2, r3, r2
+; CHECK-NEXT:    mul r12, r10, r4
+; CHECK-NEXT:    ldr r10, [sp, #140]
+; CHECK-NEXT:    adcs r1, r1, r5
+; CHECK-NEXT:    adc r3, r12, r6
+; CHECK-NEXT:    ldr r6, [sp, #136]
+; CHECK-NEXT:    umull r4, r5, r6, r11
+; CHECK-NEXT:    mov r12, r6
+; CHECK-NEXT:    umlal r5, r9, r10, r11
+; CHECK-NEXT:    adds r2, r4, r2
+; CHECK-NEXT:    umlal r5, r9, r6, r0
+; CHECK-NEXT:    ldr r4, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    mla r6, r10, r0, r9
+; CHECK-NEXT:    ldr r0, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    adcs r1, r5, r1
+; CHECK-NEXT:    ldr r5, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    adc r3, r3, r6
+; CHECK-NEXT:    subs r6, r4, r0
+; CHECK-NEXT:    sbcs r7, r5, r7
+; CHECK-NEXT:    ldr r5, [sp, #120]
+; CHECK-NEXT:    sbcs r2, r5, r2
+; CHECK-NEXT:    ldr r5, [sp, #124]
+; CHECK-NEXT:    sbcs r1, r5, r1
+; CHECK-NEXT:    ldr r5, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    stm r5, {r6, r7}
+; CHECK-NEXT:    str r2, [r5, #8]
+; CHECK-NEXT:    str r1, [r5, #12]
+; CHECK-NEXT:    mla r1, lr, r11, r3
+; CHECK-NEXT:    ldr r3, [sp, #152]
+; CHECK-NEXT:    mul r2, r8, r3
+; CHECK-NEXT:    ldr r3, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    mla r0, r3, r12, r2
+; CHECK-NEXT:    add r0, r1, r0
+; CHECK-NEXT:    ldr r1, [sp, #128]
+; CHECK-NEXT:    sbc r0, r1, r0
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    strb r0, [r5, #16]
+; CHECK-NEXT:    add sp, sp, #84
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    mov pc, lr
+  %res = urem i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @sdiv129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: sdiv129:
+; CHECK:       @ %bb.0: @ %_udiv-special-cases
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #124
+; CHECK-NEXT:    sub sp, sp, #124
+; CHECK-NEXT:    str r0, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #168]
+; CHECK-NEXT:    ldr r7, [sp, #160]
+; CHECK-NEXT:    and r1, r0, #1
+; CHECK-NEXT:    ldr r5, [sp, #176]
+; CHECK-NEXT:    rsb r10, r1, #0
+; CHECK-NEXT:    ldr r1, [sp, #164]
+; CHECK-NEXT:    eor r2, r10, r2
+; CHECK-NEXT:    eor r3, r10, r3
+; CHECK-NEXT:    subs r2, r2, r10
+; CHECK-NEXT:    eor r7, r10, r7
+; CHECK-NEXT:    sbcs r3, r3, r10
+; CHECK-NEXT:    eor r1, r10, r1
+; CHECK-NEXT:    sbcs r7, r7, r10
+; CHECK-NEXT:    eor r0, r10, r0
+; CHECK-NEXT:    sbcs r11, r1, r10
+; CHECK-NEXT:    str r2, [sp, #104] @ 4-byte Spill
+; CHECK-NEXT:    sbc r0, r0, r10
+; CHECK-NEXT:    str r0, [sp, #120] @ 4-byte Spill
+; CHECK-NEXT:    and r4, r0, #1
+; CHECK-NEXT:    orr r1, r3, r11
+; CHECK-NEXT:    orr r0, r2, r4
+; CHECK-NEXT:    str r7, [sp, #88] @ 4-byte Spill
+; CHECK-NEXT:    orr r0, r0, r7
+; CHECK-NEXT:    ldr r6, [sp, #180]
+; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    str r3, [sp, #92] @ 4-byte Spill
+; CHECK-NEXT:    rsbs r1, r0, #0
+; CHECK-NEXT:    ldr r3, [sp, #184]
+; CHECK-NEXT:    adc r0, r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #192]
+; CHECK-NEXT:    and r2, r1, #1
+; CHECK-NEXT:    rsb r7, r2, #0
+; CHECK-NEXT:    ldr r2, [sp, #188]
+; CHECK-NEXT:    eor r5, r7, r5
+; CHECK-NEXT:    eor r6, r7, r6
+; CHECK-NEXT:    subs r5, r5, r7
+; CHECK-NEXT:    eor r3, r7, r3
+; CHECK-NEXT:    sbcs r6, r6, r7
+; CHECK-NEXT:    eor r2, r7, r2
+; CHECK-NEXT:    sbcs r3, r3, r7
+; CHECK-NEXT:    eor r1, r7, r1
+; CHECK-NEXT:    sbcs r8, r2, r7
+; CHECK-NEXT:    str r5, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    sbc r1, r1, r7
+; CHECK-NEXT:    orr r2, r6, r8
+; CHECK-NEXT:    and r1, r1, #1
+; CHECK-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    orr r1, r5, r1
+; CHECK-NEXT:    str r6, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    orr r1, r1, r3
+; CHECK-NEXT:    str r3, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    orr r1, r1, r2
+; CHECK-NEXT:    rsbs r2, r1, #0
+; CHECK-NEXT:    adc r1, r1, r2
+; CHECK-NEXT:    orr r0, r1, r0
+; CHECK-NEXT:    str r0, [sp, #116] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    ldr r5, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    ldr r6, [sp, #92] @ 4-byte Reload
+; CHECK-NEXT:    cmp r11, #0
+; CHECK-NEXT:    addeq r9, r0, #32
+; CHECK-NEXT:    str r11, [sp, #84] @ 4-byte Spill
+; CHECK-NEXT:    orr r11, r5, r11
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    addeq r5, r0, #32
+; CHECK-NEXT:    cmp r11, #0
+; CHECK-NEXT:    addeq r9, r5, #64
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    add r5, r9, #128
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    str r4, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    addne r5, r0, #96
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    subs r0, r5, #127
+; CHECK-NEXT:    str r0, [sp, #112] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r0, r4, #0
+; CHECK-NEXT:    str r0, [sp, #108] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r0, r4, #0
+; CHECK-NEXT:    str r0, [sp, #100] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r0, r4, #0
+; CHECK-NEXT:    str r0, [sp, #96] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r0, r4, #0
+; CHECK-NEXT:    str r0, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r0, r4, #0
+; CHECK-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r0, r4, #0
+; CHECK-NEXT:    str r0, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    sbc r0, r4, #0
+; CHECK-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    ldr r6, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    orr r9, r6, r8
+; CHECK-NEXT:    ldr r6, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    cmp r8, #0
+; CHECK-NEXT:    str r8, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    addeq r5, r0, #32
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    ldr r0, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    ldr r6, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    addeq r8, r0, #32
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    addeq r5, r8, #64
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    add r5, r5, #128
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    ldr r1, [sp, #108] @ 4-byte Reload
+; CHECK-NEXT:    addne r5, r0, #96
+; CHECK-NEXT:    subs lr, r5, #127
+; CHECK-NEXT:    sbcs r8, r4, #0
+; CHECK-NEXT:    sbcs r2, r4, #0
+; CHECK-NEXT:    sbcs r3, r4, #0
+; CHECK-NEXT:    sbcs r6, r4, #0
+; CHECK-NEXT:    sbcs r5, r4, #0
+; CHECK-NEXT:    sbcs r0, r4, #0
+; CHECK-NEXT:    str r0, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
+; CHECK-NEXT:    sbc r12, r4, #0
+; CHECK-NEXT:    subs r0, lr, r0
+; CHECK-NEXT:    sbcs r9, r8, r1
+; CHECK-NEXT:    ldr r1, [sp, #100] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r8, r2, r1
+; CHECK-NEXT:    ldr r1, [sp, #96] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r11, r3, r1
+; CHECK-NEXT:    ldr r1, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    ldr r3, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    sbcs lr, r6, r1
+; CHECK-NEXT:    ldr r1, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r1, r5, r1
+; CHECK-NEXT:    sbcs r2, r3, r2
+; CHECK-NEXT:    ldr r3, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    sbc r6, r12, r3
+; CHECK-NEXT:    rsbs r5, r0, #128
+; CHECK-NEXT:    rscs r5, r9, #0
+; CHECK-NEXT:    mov r12, r11
+; CHECK-NEXT:    rscs r5, r8, #0
+; CHECK-NEXT:    ldr r3, [sp, #92] @ 4-byte Reload
+; CHECK-NEXT:    rscs r5, r11, #0
+; CHECK-NEXT:    rscs r5, lr, #0
+; CHECK-NEXT:    rscs r1, r1, #0
+; CHECK-NEXT:    rscs r1, r2, #0
+; CHECK-NEXT:    ldr r2, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    rscs r1, r6, #0
+; CHECK-NEXT:    ldr r6, [sp, #104] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #116] @ 4-byte Reload
+; CHECK-NEXT:    movlo r4, #1
+; CHECK-NEXT:    orrs r5, r1, r4
+; CHECK-NEXT:    ldr r4, [sp, #120] @ 4-byte Reload
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    and r11, r4, #1
+; CHECK-NEXT:    eor r4, r7, r10
+; CHECK-NEXT:    ldr r1, [sp, #84] @ 4-byte Reload
+; CHECK-NEXT:    and r10, r4, #1
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    movne r2, #0
+; CHECK-NEXT:    movne r3, #0
+; CHECK-NEXT:    movne r6, #0
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    bne .LBB2_8
+; CHECK-NEXT:  @ %bb.1: @ %_udiv-special-cases
+; CHECK-NEXT:    and lr, lr, #1
+; CHECK-NEXT:    eor r7, r0, #128
+; CHECK-NEXT:    orr r7, r7, lr
+; CHECK-NEXT:    orr r5, r9, r12
+; CHECK-NEXT:    orr r7, r7, r8
+; CHECK-NEXT:    orrs r5, r7, r5
+; CHECK-NEXT:    beq .LBB2_8
+; CHECK-NEXT:  @ %bb.2: @ %udiv-bb1
+; CHECK-NEXT:    adds r3, r0, #1
+; CHECK-NEXT:    str r3, [sp, #120] @ 4-byte Spill
+; CHECK-NEXT:    adcs r1, r9, #0
+; CHECK-NEXT:    str r1, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    adcs r7, r8, #0
+; CHECK-NEXT:    ldr r8, [sp, #92] @ 4-byte Reload
+; CHECK-NEXT:    adcs r2, r12, #0
+; CHECK-NEXT:    str r2, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    orr r1, r1, r2
+; CHECK-NEXT:    adc r2, lr, #0
+; CHECK-NEXT:    and r2, r2, #1
+; CHECK-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    orr r2, r3, r2
+; CHECK-NEXT:    ldr lr, [sp, #104] @ 4-byte Reload
+; CHECK-NEXT:    orr r2, r2, r7
+; CHECK-NEXT:    rsb r3, r0, #32
+; CHECK-NEXT:    orr r1, r2, r1
+; CHECK-NEXT:    str r1, [sp, #112] @ 4-byte Spill
+; CHECK-NEXT:    lsr r1, lr, r0
+; CHECK-NEXT:    orr r2, r1, r8, lsl r3
+; CHECK-NEXT:    subs r1, r0, #32
+; CHECK-NEXT:    str r4, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    ldr r4, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    mov r12, #0
+; CHECK-NEXT:    lsrpl r2, r8, r1
+; CHECK-NEXT:    rsb r1, r0, #64
+; CHECK-NEXT:    str r1, [sp, #96] @ 4-byte Spill
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    lsl r6, r4, r1
+; CHECK-NEXT:    rsb r1, r0, #128
+; CHECK-NEXT:    str r10, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT:    movpl r6, #0
+; CHECK-NEXT:    str r7, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    rsb r7, r1, #64
+; CHECK-NEXT:    ldr r10, [sp, #84] @ 4-byte Reload
+; CHECK-NEXT:    rsb r5, r7, #32
+; CHECK-NEXT:    str r3, [sp, #100] @ 4-byte Spill
+; CHECK-NEXT:    lsr r3, r4, r7
+; CHECK-NEXT:    str r5, [sp, #108] @ 4-byte Spill
+; CHECK-NEXT:    mov r9, r7
+; CHECK-NEXT:    orr r3, r3, r10, lsl r5
+; CHECK-NEXT:    rsbs r5, r1, #32
+; CHECK-NEXT:    str r7, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    lsrpl r3, r10, r5
+; CHECK-NEXT:    cmp r0, #64
+; CHECK-NEXT:    orrlo r3, r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    moveq r3, lr
+; CHECK-NEXT:    rsbs r11, r0, #96
+; CHECK-NEXT:    lsl r2, r6, r1
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    cmp r1, #64
+; CHECK-NEXT:    orrlo r3, r3, r2
+; CHECK-NEXT:    mvn r2, #31
+; CHECK-NEXT:    subs r2, r2, r0
+; CHECK-NEXT:    rsb r0, r0, #0
+; CHECK-NEXT:    lsr r2, r8, r9
+; CHECK-NEXT:    ldr r9, [sp, #96] @ 4-byte Reload
+; CHECK-NEXT:    lsl r7, lr, r0
+; CHECK-NEXT:    movpl r7, #0
+; CHECK-NEXT:    cmp r0, #64
+; CHECK-NEXT:    movhs r7, r12
+; CHECK-NEXT:    cmp r1, #128
+; CHECK-NEXT:    movlo r7, r3
+; CHECK-NEXT:    rsb r3, r9, #32
+; CHECK-NEXT:    lsr r0, r4, r5
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    lsr r3, lr, r3
+; CHECK-NEXT:    moveq r7, r6
+; CHECK-NEXT:    orr r6, r3, r8, lsl r9
+; CHECK-NEXT:    orr r0, r0, r10, lsl r1
+; CHECK-NEXT:    cmp r11, #0
+; CHECK-NEXT:    ldr r3, [sp, #100] @ 4-byte Reload
+; CHECK-NEXT:    lslpl r0, r4, r11
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    lslpl r6, lr, r3
+; CHECK-NEXT:    cmp r1, #64
+; CHECK-NEXT:    orrlo r6, r0, r2
+; CHECK-NEXT:    ldr r0, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp, #108] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    moveq r6, r10
+; CHECK-NEXT:    cmp r1, #128
+; CHECK-NEXT:    lsr r0, lr, r0
+; CHECK-NEXT:    movhs r6, r12
+; CHECK-NEXT:    orr r0, r0, r8, lsl r2
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    lsrpl r0, r8, r5
+; CHECK-NEXT:    lsl r2, r4, r1
+; CHECK-NEXT:    cmp r11, #0
+; CHECK-NEXT:    str r6, [sp, #116] @ 4-byte Spill
+; CHECK-NEXT:    mov r6, lr
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    lsl lr, lr, r9
+; CHECK-NEXT:    movpl lr, #0
+; CHECK-NEXT:    cmp r1, #64
+; CHECK-NEXT:    orrlo lr, r2, r0
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    moveq lr, r4
+; CHECK-NEXT:    cmp r1, #128
+; CHECK-NEXT:    movhs lr, r12
+; CHECK-NEXT:    lsl r2, r6, r1
+; CHECK-NEXT:    cmp r11, #0
+; CHECK-NEXT:    lsr r0, r6, r5
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    cmp r1, #64
+; CHECK-NEXT:    movhs r2, r12
+; CHECK-NEXT:    cmp r1, #128
+; CHECK-NEXT:    movhs r2, r12
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    orr r8, r0, r8, lsl r1
+; CHECK-NEXT:    cmp r11, #0
+; CHECK-NEXT:    lslpl r8, r6, r11
+; CHECK-NEXT:    cmp r1, #64
+; CHECK-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
+; CHECK-NEXT:    movhs r8, r12
+; CHECK-NEXT:    cmp r1, #128
+; CHECK-NEXT:    movhs r8, r12
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    beq .LBB2_6
+; CHECK-NEXT:  @ %bb.3: @ %udiv-preheader
+; CHECK-NEXT:    ldr r5, [sp, #120] @ 4-byte Reload
+; CHECK-NEXT:    and r0, r7, #1
+; CHECK-NEXT:    str r0, [sp, #112] @ 4-byte Spill
+; CHECK-NEXT:    rsb r11, r5, #32
+; CHECK-NEXT:    str r2, [sp, #108] @ 4-byte Spill
+; CHECK-NEXT:    lsr r0, r6, r5
+; CHECK-NEXT:    ldr r12, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    orr r1, r0, r3, lsl r11
+; CHECK-NEXT:    subs r0, r5, #32
+; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    rsb r9, r5, #128
+; CHECK-NEXT:    lsrpl r1, r3, r0
+; CHECK-NEXT:    rsb r0, r5, #64
+; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    lsl r2, r4, r0
+; CHECK-NEXT:    sub r0, r5, #64
+; CHECK-NEXT:    cmp r11, #0
+; CHECK-NEXT:    rsb r10, r5, #96
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    str r0, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    lsr r0, r4, r0
+; CHECK-NEXT:    subs r7, r5, #96
+; CHECK-NEXT:    orr r0, r0, r3, lsl r10
+; CHECK-NEXT:    str r7, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    lsrpl r0, r3, r7
+; CHECK-NEXT:    cmp r5, #64
+; CHECK-NEXT:    orrlo r0, r1, r2
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    moveq r0, r6
+; CHECK-NEXT:    sub r7, r5, #128
+; CHECK-NEXT:    lsl r1, r12, r9
+; CHECK-NEXT:    cmp r10, #0
+; CHECK-NEXT:    movpl r1, #0
+; CHECK-NEXT:    cmp r9, #64
+; CHECK-NEXT:    orrlo r0, r0, r1
+; CHECK-NEXT:    subs r2, r5, #160
+; CHECK-NEXT:    lsr r1, r12, r7
+; CHECK-NEXT:    str r2, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    movpl r1, #0
+; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    cmp r7, #64
+; CHECK-NEXT:    str r10, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    movhs r1, r2
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    moveq r1, r12
+; CHECK-NEXT:    cmp r5, #128
+; CHECK-NEXT:    movlo r1, r0
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    moveq r1, r6
+; CHECK-NEXT:    cmp r11, #0
+; CHECK-NEXT:    str r1, [sp, #104] @ 4-byte Spill
+; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    str r7, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:    rsb r10, r1, #32
+; CHECK-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    lsr r0, r12, r10
+; CHECK-NEXT:    lslpl r0, r12, r11
+; CHECK-NEXT:    rsbs r7, r9, #32
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    cmp r9, #64
+; CHECK-NEXT:    movhs r2, r0
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    moveq r2, r9
+; CHECK-NEXT:    lsr r0, r3, r5
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    cmp r5, #64
+; CHECK-NEXT:    orrlo r2, r2, r0
+; CHECK-NEXT:    cmp r5, #128
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movhs r2, r0
+; CHECK-NEXT:    rsb r0, r9, #64
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    moveq r2, r3
+; CHECK-NEXT:    lsr r0, r12, r0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    str r2, [sp, #100] @ 4-byte Spill
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    lsl r2, r12, r1
+; CHECK-NEXT:    cmp r11, #0
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    cmp r9, #64
+; CHECK-NEXT:    movlo r2, r0
+; CHECK-NEXT:    lsr r0, r4, r5
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    orr r0, r0, r3, lsl r11
+; CHECK-NEXT:    moveq r2, r9
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    lsrpl r0, r3, r6
+; CHECK-NEXT:    cmp r5, #64
+; CHECK-NEXT:    orrlo r2, r2, r0
+; CHECK-NEXT:    cmp r5, #128
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movhs r2, r0
+; CHECK-NEXT:    lsr r0, r4, r10
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    orr r0, r0, r3, lsl r1
+; CHECK-NEXT:    moveq r2, r4
+; CHECK-NEXT:    cmp r11, #0
+; CHECK-NEXT:    ldr r1, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    lslpl r0, r4, r11
+; CHECK-NEXT:    ldr r4, [sp, #92] @ 4-byte Reload
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    str r2, [sp, #96] @ 4-byte Spill
+; CHECK-NEXT:    mov r11, #0
+; CHECK-NEXT:    lsr r3, r3, r1
+; CHECK-NEXT:    ldr r1, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    lsr r2, r4, r5
+; CHECK-NEXT:    mov r6, lr
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    movpl r3, #0
+; CHECK-NEXT:    cmp r5, #64
+; CHECK-NEXT:    ldr r1, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    orrlo r3, r2, r0
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    lsr r0, r12, r7
+; CHECK-NEXT:    moveq r3, r4
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    lslpl r0, r12, r1
+; CHECK-NEXT:    cmp r9, #64
+; CHECK-NEXT:    orrlo r3, r3, r0
+; CHECK-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    mov r9, r8
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movpl r11, #0
+; CHECK-NEXT:    str r0, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    cmp r2, #64
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movhs r11, r0
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    moveq r11, r2
+; CHECK-NEXT:    cmp r5, #128
+; CHECK-NEXT:    ldr r0, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    movlo r11, r3
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    ldr r2, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT:    moveq r11, r4
+; CHECK-NEXT:    subs r0, r0, #1
+; CHECK-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    mov r3, #0
+; CHECK-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    ldr lr, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r0, r0, #0
+; CHECK-NEXT:    str r0, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    ldr r8, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r0, r0, #0
+; CHECK-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r0, r2, #0
+; CHECK-NEXT:    str r0, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    ldr r10, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    ldr r7, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    adc r0, r0, #1
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    str r0, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:  .LBB2_4: @ %udiv-do-while
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    str r3, [sp, #88] @ 4-byte Spill
+; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    ldr r3, [sp, #120] @ 4-byte Reload
+; CHECK-NEXT:    lsl r5, r1, #1
+; CHECK-NEXT:    str r6, [sp, #84] @ 4-byte Spill
+; CHECK-NEXT:    subs r3, r3, #1
+; CHECK-NEXT:    str r3, [sp, #120] @ 4-byte Spill
+; CHECK-NEXT:    sbcs lr, lr, #0
+; CHECK-NEXT:    sbcs r8, r8, #0
+; CHECK-NEXT:    sbcs r10, r10, #0
+; CHECK-NEXT:    adc r7, r7, #1
+; CHECK-NEXT:    orr r0, lr, r10
+; CHECK-NEXT:    and r11, r7, #1
+; CHECK-NEXT:    orr r7, r3, r11
+; CHECK-NEXT:    ldr r3, [sp, #96] @ 4-byte Reload
+; CHECK-NEXT:    orr r7, r7, r8
+; CHECK-NEXT:    orr r0, r7, r0
+; CHECK-NEXT:    ldr r7, [sp, #100] @ 4-byte Reload
+; CHECK-NEXT:    lsl r6, r3, #1
+; CHECK-NEXT:    str r0, [sp, #92] @ 4-byte Spill
+; CHECK-NEXT:    orr r6, r6, r1, lsr #31
+; CHECK-NEXT:    ldr r1, [sp, #112] @ 4-byte Reload
+; CHECK-NEXT:    lsl r0, r7, #1
+; CHECK-NEXT:    orr r0, r0, r3, lsr #31
+; CHECK-NEXT:    ldr r3, [sp, #104] @ 4-byte Reload
+; CHECK-NEXT:    and r4, r1, #1
+; CHECK-NEXT:    ldr r1, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    orr r4, r4, r3, lsl #1
+; CHECK-NEXT:    orr r5, r5, r3, lsr #31
+; CHECK-NEXT:    subs r3, r1, r4
+; CHECK-NEXT:    ldr r1, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r3, r1, r5
+; CHECK-NEXT:    ldr r1, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r3, r1, r6
+; CHECK-NEXT:    ldr r1, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r3, r1, r0
+; CHECK-NEXT:    ldr r1, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    sbc r1, r1, r7, lsr #31
+; CHECK-NEXT:    ldr r7, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    and r1, r1, #1
+; CHECK-NEXT:    rsb r3, r1, #0
+; CHECK-NEXT:    ldr r1, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    and r12, r3, r2
+; CHECK-NEXT:    and r7, r3, r7
+; CHECK-NEXT:    and r2, r3, r1
+; CHECK-NEXT:    ldr r1, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    subs r4, r4, r7
+; CHECK-NEXT:    mov r7, r11
+; CHECK-NEXT:    and r1, r3, r1
+; CHECK-NEXT:    str r4, [sp, #104] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r11, r5, r1
+; CHECK-NEXT:    ldr r5, [sp, #84] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r2, r6, r2
+; CHECK-NEXT:    str r2, [sp, #96] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r0, r0, r12
+; CHECK-NEXT:    ldr r2, [sp, #108] @ 4-byte Reload
+; CHECK-NEXT:    str r0, [sp, #100] @ 4-byte Spill
+; CHECK-NEXT:    and r12, r3, #1
+; CHECK-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    ldr r6, [sp, #116] @ 4-byte Reload
+; CHECK-NEXT:    orr r4, r0, r2, lsl #1
+; CHECK-NEXT:    lsl r0, r9, #1
+; CHECK-NEXT:    orr r0, r0, r2, lsr #31
+; CHECK-NEXT:    lsl r2, r5, #1
+; CHECK-NEXT:    ldr r3, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    orr r2, r2, r9, lsr #31
+; CHECK-NEXT:    str r4, [sp, #108] @ 4-byte Spill
+; CHECK-NEXT:    orr r1, r3, r2
+; CHECK-NEXT:    lsl r2, r6, #1
+; CHECK-NEXT:    orr r2, r2, r5, lsr #31
+; CHECK-NEXT:    orr r0, r3, r0
+; CHECK-NEXT:    orr r5, r3, r2
+; CHECK-NEXT:    orr r2, r3, r6, lsr #31
+; CHECK-NEXT:    and r2, r2, #1
+; CHECK-NEXT:    str r2, [sp, #112] @ 4-byte Spill
+; CHECK-NEXT:    ldr r3, [sp, #92] @ 4-byte Reload
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    ldr r2, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT:    mov r6, r1
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    mov r3, r12
+; CHECK-NEXT:    str r5, [sp, #116] @ 4-byte Spill
+; CHECK-NEXT:    bne .LBB2_4
+; CHECK-NEXT:  @ %bb.5:
+; CHECK-NEXT:    ldr r8, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    mov lr, r1
+; CHECK-NEXT:    ldr r10, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    ldr r4, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    mov r7, r8
+; CHECK-NEXT:    mov r9, r8
+; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    b .LBB2_7
+; CHECK-NEXT:  .LBB2_6:
+; CHECK-NEXT:    ldr r4, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    ldr r5, [sp, #116] @ 4-byte Reload
+; CHECK-NEXT:    mov r7, #0
+; CHECK-NEXT:    ldr r10, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    mov r9, #0
+; CHECK-NEXT:    str r0, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:  .LBB2_7: @ %udiv-loop-exit
+; CHECK-NEXT:    lsl r0, r8, #1
+; CHECK-NEXT:    orr r6, r12, r2, lsl #1
+; CHECK-NEXT:    orr r0, r0, r2, lsr #31
+; CHECK-NEXT:    ldr r2, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    orr r3, r2, r0
+; CHECK-NEXT:    lsl r0, lr, #1
+; CHECK-NEXT:    orr r0, r0, r8, lsr #31
+; CHECK-NEXT:    orr r2, r7, r0
+; CHECK-NEXT:    lsl r0, r5, #1
+; CHECK-NEXT:    orr r0, r0, lr, lsr #31
+; CHECK-NEXT:    orr r1, r7, r0
+; CHECK-NEXT:    orr r0, r9, r5, lsr #31
+; CHECK-NEXT:    and r11, r0, #1
+; CHECK-NEXT:  .LBB2_8: @ %udiv-end
+; CHECK-NEXT:    eor r0, r6, r4
+; CHECK-NEXT:    eor r3, r3, r4
+; CHECK-NEXT:    subs r0, r0, r4
+; CHECK-NEXT:    eor r2, r2, r4
+; CHECK-NEXT:    sbcs r3, r3, r4
+; CHECK-NEXT:    ldr r7, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r2, r2, r4
+; CHECK-NEXT:    eor r1, r1, r4
+; CHECK-NEXT:    stm r7, {r0, r3}
+; CHECK-NEXT:    sbcs r1, r1, r4
+; CHECK-NEXT:    eor r0, r11, r10
+; CHECK-NEXT:    sbc r0, r0, r10
+; CHECK-NEXT:    str r2, [r7, #8]
+; CHECK-NEXT:    str r1, [r7, #12]
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    strb r0, [r7, #16]
+; CHECK-NEXT:    add sp, sp, #124
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    mov pc, lr
+  %res = sdiv i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @srem129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: srem129:
+; CHECK:       @ %bb.0: @ %_udiv-special-cases
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #124
+; CHECK-NEXT:    sub sp, sp, #124
+; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #168]
+; CHECK-NEXT:    ldr r7, [sp, #160]
+; CHECK-NEXT:    and r1, r0, #1
+; CHECK-NEXT:    str r1, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    rsb r6, r1, #0
+; CHECK-NEXT:    ldr r1, [sp, #164]
+; CHECK-NEXT:    eor r2, r2, r6
+; CHECK-NEXT:    eor r3, r3, r6
+; CHECK-NEXT:    subs r2, r2, r6
+; CHECK-NEXT:    eor r7, r7, r6
+; CHECK-NEXT:    sbcs r3, r3, r6
+; CHECK-NEXT:    eor r1, r1, r6
+; CHECK-NEXT:    sbcs r4, r7, r6
+; CHECK-NEXT:    eor r0, r0, r6
+; CHECK-NEXT:    sbcs r11, r1, r6
+; CHECK-NEXT:    str r2, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    sbc r0, r0, r6
+; CHECK-NEXT:    str r0, [sp, #120] @ 4-byte Spill
+; CHECK-NEXT:    and r9, r0, #1
+; CHECK-NEXT:    orr r1, r3, r11
+; CHECK-NEXT:    orr r0, r2, r9
+; CHECK-NEXT:    ldr r5, [sp, #176]
+; CHECK-NEXT:    orr r0, r0, r4
+; CHECK-NEXT:    str r6, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    ldr r6, [sp, #180]
+; CHECK-NEXT:    rsbs r1, r0, #0
+; CHECK-NEXT:    ldr r7, [sp, #184]
+; CHECK-NEXT:    adc r0, r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #192]
+; CHECK-NEXT:    str r3, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:    and r2, r1, #1
+; CHECK-NEXT:    ldr r3, [sp, #188]
+; CHECK-NEXT:    rsb r2, r2, #0
+; CHECK-NEXT:    eor r5, r5, r2
+; CHECK-NEXT:    eor r6, r6, r2
+; CHECK-NEXT:    subs r5, r5, r2
+; CHECK-NEXT:    eor r7, r7, r2
+; CHECK-NEXT:    sbcs r6, r6, r2
+; CHECK-NEXT:    eor r3, r3, r2
+; CHECK-NEXT:    sbcs r8, r7, r2
+; CHECK-NEXT:    eor r1, r1, r2
+; CHECK-NEXT:    sbcs r7, r3, r2
+; CHECK-NEXT:    str r5, [sp, #88] @ 4-byte Spill
+; CHECK-NEXT:    sbc r1, r1, r2
+; CHECK-NEXT:    orr r3, r6, r7
+; CHECK-NEXT:    and r1, r1, #1
+; CHECK-NEXT:    str r1, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT:    orr r1, r5, r1
+; CHECK-NEXT:    str r6, [sp, #84] @ 4-byte Spill
+; CHECK-NEXT:    orr r1, r1, r8
+; CHECK-NEXT:    orr r1, r1, r3
+; CHECK-NEXT:    rsbs r2, r1, #0
+; CHECK-NEXT:    adc r1, r1, r2
+; CHECK-NEXT:    orr r0, r1, r0
+; CHECK-NEXT:    str r0, [sp, #116] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    str r4, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    ldr r5, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    cmp r11, #0
+; CHECK-NEXT:    addeq r10, r0, #32
+; CHECK-NEXT:    str r11, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    orr r4, r4, r11
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    addeq r11, r0, #32
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    addeq r10, r11, #64
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    add r4, r10, #128
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    str r9, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    addne r4, r0, #96
+; CHECK-NEXT:    mov r5, #0
+; CHECK-NEXT:    subs r10, r4, #127
+; CHECK-NEXT:    sbcs r9, r5, #0
+; CHECK-NEXT:    sbcs r11, r5, #0
+; CHECK-NEXT:    sbcs r0, r5, #0
+; CHECK-NEXT:    str r0, [sp, #112] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r0, r5, #0
+; CHECK-NEXT:    str r0, [sp, #108] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r0, r5, #0
+; CHECK-NEXT:    str r0, [sp, #104] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r0, r5, #0
+; CHECK-NEXT:    str r0, [sp, #100] @ 4-byte Spill
+; CHECK-NEXT:    sbc r0, r5, #0
+; CHECK-NEXT:    str r0, [sp, #96] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    str r8, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    str r7, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    orr r8, r8, r7
+; CHECK-NEXT:    ldr r7, [sp, #84] @ 4-byte Reload
+; CHECK-NEXT:    addeq r6, r0, #32
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    addeq r4, r0, #32
+; CHECK-NEXT:    cmp r8, #0
+; CHECK-NEXT:    addeq r6, r4, #64
+; CHECK-NEXT:    add r4, r6, #128
+; CHECK-NEXT:    ldr r6, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    bl __clzsi2
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    ldr r8, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    addne r4, r0, #96
+; CHECK-NEXT:    subs r0, r4, #127
+; CHECK-NEXT:    sbcs r1, r5, #0
+; CHECK-NEXT:    sbcs r2, r5, #0
+; CHECK-NEXT:    sbcs r3, r5, #0
+; CHECK-NEXT:    sbcs r4, r5, #0
+; CHECK-NEXT:    sbcs lr, r5, #0
+; CHECK-NEXT:    sbcs r7, r5, #0
+; CHECK-NEXT:    sbc r12, r5, #0
+; CHECK-NEXT:    subs r6, r0, r10
+; CHECK-NEXT:    sbcs r10, r1, r9
+; CHECK-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r11, r2, r11
+; CHECK-NEXT:    ldr r1, [sp, #100] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r9, r3, r0
+; CHECK-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp, #96] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r4, r4, r0
+; CHECK-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r0, lr, r0
+; CHECK-NEXT:    ldr lr, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r1, r7, r1
+; CHECK-NEXT:    sbc r2, r12, r2
+; CHECK-NEXT:    rsbs r3, r6, #128
+; CHECK-NEXT:    rscs r3, r10, #0
+; CHECK-NEXT:    rscs r3, r11, #0
+; CHECK-NEXT:    rscs r3, r9, #0
+; CHECK-NEXT:    rscs r3, r4, #0
+; CHECK-NEXT:    rscs r0, r0, #0
+; CHECK-NEXT:    ldr r3, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    rscs r0, r1, #0
+; CHECK-NEXT:    ldr r1, [sp, #116] @ 4-byte Reload
+; CHECK-NEXT:    rscs r0, r2, #0
+; CHECK-NEXT:    ldr r2, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movlo r0, #1
+; CHECK-NEXT:    orrs r5, r1, r0
+; CHECK-NEXT:    ldr r0, [sp, #120] @ 4-byte Reload
+; CHECK-NEXT:    movne r2, #0
+; CHECK-NEXT:    movne r0, #0
+; CHECK-NEXT:    movne lr, #0
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    movne r3, #0
+; CHECK-NEXT:    movne r8, #0
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    bne .LBB3_8
+; CHECK-NEXT:  @ %bb.1: @ %_udiv-special-cases
+; CHECK-NEXT:    and r4, r4, #1
+; CHECK-NEXT:    eor r7, r6, #128
+; CHECK-NEXT:    orr r7, r7, r4
+; CHECK-NEXT:    orr r5, r10, r9
+; CHECK-NEXT:    orr r7, r7, r11
+; CHECK-NEXT:    orrs r5, r7, r5
+; CHECK-NEXT:    beq .LBB3_8
+; CHECK-NEXT:  @ %bb.2: @ %udiv-bb1
+; CHECK-NEXT:    adds r2, r6, #1
+; CHECK-NEXT:    ldr lr, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    adcs r0, r10, #0
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    adcs r3, r11, #0
+; CHECK-NEXT:    ldr r12, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    adcs r1, r9, #0
+; CHECK-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    adc r1, r4, #0
+; CHECK-NEXT:    and r1, r1, #1
+; CHECK-NEXT:    str r1, [sp, #116] @ 4-byte Spill
+; CHECK-NEXT:    orr r1, r2, r1
+; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    orr r1, r1, r3
+; CHECK-NEXT:    rsb r3, r6, #32
+; CHECK-NEXT:    orr r0, r1, r0
+; CHECK-NEXT:    str r0, [sp, #112] @ 4-byte Spill
+; CHECK-NEXT:    lsr r0, lr, r6
+; CHECK-NEXT:    subs r1, r6, #32
+; CHECK-NEXT:    orr r0, r0, r12, lsl r3
+; CHECK-NEXT:    ldr r5, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    rsb r8, r6, #128
+; CHECK-NEXT:    lsrpl r0, r12, r1
+; CHECK-NEXT:    rsb r1, r6, #64
+; CHECK-NEXT:    str r3, [sp, #104] @ 4-byte Spill
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    rsb r3, r8, #64
+; CHECK-NEXT:    str r2, [sp, #120] @ 4-byte Spill
+; CHECK-NEXT:    lsl r2, r5, r1
+; CHECK-NEXT:    str r1, [sp, #100] @ 4-byte Spill
+; CHECK-NEXT:    rsb r4, r3, #32
+; CHECK-NEXT:    lsr r1, r5, r3
+; CHECK-NEXT:    mov r11, r3
+; CHECK-NEXT:    str r3, [sp, #96] @ 4-byte Spill
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    ldr r3, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    rsbs r7, r8, #32
+; CHECK-NEXT:    str r4, [sp, #108] @ 4-byte Spill
+; CHECK-NEXT:    orr r1, r1, r3, lsl r4
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    lsrpl r1, r3, r7
+; CHECK-NEXT:    cmp r6, #64
+; CHECK-NEXT:    orrlo r1, r0, r2
+; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    moveq r1, lr
+; CHECK-NEXT:    rsbs r10, r6, #96
+; CHECK-NEXT:    lsl r0, r2, r8
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    cmp r8, #64
+; CHECK-NEXT:    orrlo r1, r1, r0
+; CHECK-NEXT:    mvn r0, #31
+; CHECK-NEXT:    subs r0, r0, r6
+; CHECK-NEXT:    rsb r0, r6, #0
+; CHECK-NEXT:    mov r6, r3
+; CHECK-NEXT:    lsl r9, lr, r0
+; CHECK-NEXT:    movpl r9, #0
+; CHECK-NEXT:    cmp r0, #64
+; CHECK-NEXT:    lsr r0, r5, r7
+; CHECK-NEXT:    movhs r9, r4
+; CHECK-NEXT:    cmp r8, #128
+; CHECK-NEXT:    orr r0, r0, r3, lsl r8
+; CHECK-NEXT:    ldr r3, [sp, #100] @ 4-byte Reload
+; CHECK-NEXT:    movlo r9, r1
+; CHECK-NEXT:    cmp r8, #0
+; CHECK-NEXT:    lsr r1, r12, r11
+; CHECK-NEXT:    moveq r9, r2
+; CHECK-NEXT:    rsb r2, r3, #32
+; CHECK-NEXT:    cmp r10, #0
+; CHECK-NEXT:    ldr r11, [sp, #104] @ 4-byte Reload
+; CHECK-NEXT:    lsr r2, lr, r2
+; CHECK-NEXT:    lslpl r0, r5, r10
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    orr r2, r2, r12, lsl r3
+; CHECK-NEXT:    movpl r1, #0
+; CHECK-NEXT:    cmp r11, #0
+; CHECK-NEXT:    lslpl r2, lr, r11
+; CHECK-NEXT:    cmp r8, #64
+; CHECK-NEXT:    orrlo r2, r0, r1
+; CHECK-NEXT:    ldr r0, [sp, #96] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #108] @ 4-byte Reload
+; CHECK-NEXT:    cmp r8, #0
+; CHECK-NEXT:    moveq r2, r6
+; CHECK-NEXT:    cmp r8, #128
+; CHECK-NEXT:    lsr r0, lr, r0
+; CHECK-NEXT:    movhs r2, r4
+; CHECK-NEXT:    orr r0, r0, r12, lsl r1
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    lsrpl r0, r12, r7
+; CHECK-NEXT:    lsl r1, r5, r8
+; CHECK-NEXT:    cmp r10, #0
+; CHECK-NEXT:    str r2, [sp, #92] @ 4-byte Spill
+; CHECK-NEXT:    movpl r1, #0
+; CHECK-NEXT:    lsl r2, lr, r3
+; CHECK-NEXT:    cmp r11, #0
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    cmp r8, #64
+; CHECK-NEXT:    orrlo r2, r1, r0
+; CHECK-NEXT:    cmp r8, #0
+; CHECK-NEXT:    moveq r2, r5
+; CHECK-NEXT:    cmp r8, #128
+; CHECK-NEXT:    movhs r2, r4
+; CHECK-NEXT:    lsl r1, lr, r8
+; CHECK-NEXT:    cmp r10, #0
+; CHECK-NEXT:    lsr r0, lr, r7
+; CHECK-NEXT:    movpl r1, #0
+; CHECK-NEXT:    cmp r8, #64
+; CHECK-NEXT:    movhs r1, r4
+; CHECK-NEXT:    cmp r8, #128
+; CHECK-NEXT:    movhs r1, r4
+; CHECK-NEXT:    orr r6, r0, r12, lsl r8
+; CHECK-NEXT:    cmp r10, #0
+; CHECK-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
+; CHECK-NEXT:    lslpl r6, lr, r10
+; CHECK-NEXT:    cmp r8, #64
+; CHECK-NEXT:    movhs r6, r4
+; CHECK-NEXT:    cmp r8, #128
+; CHECK-NEXT:    movhs r6, r4
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    str r2, [sp, #96] @ 4-byte Spill
+; CHECK-NEXT:    beq .LBB3_6
+; CHECK-NEXT:  @ %bb.3: @ %udiv-preheader
+; CHECK-NEXT:    ldr r3, [sp, #120] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r5
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    str r1, [sp, #112] @ 4-byte Spill
+; CHECK-NEXT:    rsb r5, r3, #32
+; CHECK-NEXT:    subs r8, r3, #32
+; CHECK-NEXT:    lsr r0, lr, r3
+; CHECK-NEXT:    ldr r4, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    orr r12, r0, r12, lsl r5
+; CHECK-NEXT:    rsb r0, r3, #64
+; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT:    lsrpl r12, r2, r8
+; CHECK-NEXT:    lsl r1, r10, r0
+; CHECK-NEXT:    sub r0, r3, #64
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    rsb r7, r3, #96
+; CHECK-NEXT:    lsr r2, r10, r0
+; CHECK-NEXT:    movpl r1, #0
+; CHECK-NEXT:    str r0, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    subs r0, r3, #96
+; CHECK-NEXT:    orr r2, r2, r4, lsl r7
+; CHECK-NEXT:    ldr r11, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    lsrpl r2, r4, r0
+; CHECK-NEXT:    cmp r3, #64
+; CHECK-NEXT:    orrlo r2, r12, r1
+; CHECK-NEXT:    rsb r12, r3, #128
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    str r0, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    moveq r2, lr
+; CHECK-NEXT:    str r7, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    lsl r0, r11, r12
+; CHECK-NEXT:    sub r7, r3, #128
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    cmp r12, #64
+; CHECK-NEXT:    str r7, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    orrlo r2, r2, r0
+; CHECK-NEXT:    subs r1, r3, #160
+; CHECK-NEXT:    lsr r0, r11, r7
+; CHECK-NEXT:    str r1, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    mov r1, #0
+; CHECK-NEXT:    cmp r7, #64
+; CHECK-NEXT:    str r6, [sp, #104] @ 4-byte Spill
+; CHECK-NEXT:    movhs r0, r1
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    moveq r0, r11
+; CHECK-NEXT:    cmp r3, #128
+; CHECK-NEXT:    movlo r0, r2
+; CHECK-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    mov r1, #0
+; CHECK-NEXT:    rsb r7, r2, #32
+; CHECK-NEXT:    moveq r0, lr
+; CHECK-NEXT:    str r0, [sp, #108] @ 4-byte Spill
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    lsr r0, r11, r7
+; CHECK-NEXT:    mov r6, #0
+; CHECK-NEXT:    lslpl r0, r11, r5
+; CHECK-NEXT:    rsbs lr, r12, #32
+; CHECK-NEXT:    movpl r1, #0
+; CHECK-NEXT:    cmp r12, #64
+; CHECK-NEXT:    movhs r1, r0
+; CHECK-NEXT:    cmp r12, #0
+; CHECK-NEXT:    moveq r1, r12
+; CHECK-NEXT:    lsr r0, r4, r3
+; CHECK-NEXT:    cmp r8, #0
+; CHECK-NEXT:    str lr, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    cmp r3, #64
+; CHECK-NEXT:    orrlo r1, r1, r0
+; CHECK-NEXT:    rsb r0, r12, #64
+; CHECK-NEXT:    cmp r3, #128
+; CHECK-NEXT:    and r9, r9, #1
+; CHECK-NEXT:    movhs r1, r6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    moveq r1, r4
+; CHECK-NEXT:    cmp lr, #0
+; CHECK-NEXT:    lsr r0, r11, r0
+; CHECK-NEXT:    str r1, [sp, #100] @ 4-byte Spill
+; CHECK-NEXT:    movpl r0, #0
+; CHECK-NEXT:    lsl r1, r11, r2
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    mov r6, #0
+; CHECK-NEXT:    movpl r1, #0
+; CHECK-NEXT:    cmp r12, #64
+; CHECK-NEXT:    movlo r1, r0
+; CHECK-NEXT:    lsr r0, r10, r3
+; CHECK-NEXT:    cmp r12, #0
+; CHECK-NEXT:    orr r0, r0, r4, lsl r5
+; CHECK-NEXT:    moveq r1, r12
+; CHECK-NEXT:    cmp r8, #0
+; CHECK-NEXT:    lsrpl r0, r4, r8
+; CHECK-NEXT:    cmp r3, #64
+; CHECK-NEXT:    orrlo r1, r1, r0
+; CHECK-NEXT:    lsr r0, r10, r7
+; CHECK-NEXT:    cmp r3, #128
+; CHECK-NEXT:    orr r0, r0, r4, lsl r2
+; CHECK-NEXT:    ldr r2, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    movhs r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    moveq r1, r10
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    lsr r2, r4, r2
+; CHECK-NEXT:    ldr r4, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    lslpl r0, r10, r5
+; CHECK-NEXT:    lsr r5, r6, r3
+; CHECK-NEXT:    cmp r8, #0
+; CHECK-NEXT:    mov r7, #0
+; CHECK-NEXT:    movpl r5, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    movpl r2, #0
+; CHECK-NEXT:    cmp r3, #64
+; CHECK-NEXT:    orrlo r2, r5, r0
+; CHECK-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    ldr r5, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    moveq r2, r6
+; CHECK-NEXT:    mov lr, #0
+; CHECK-NEXT:    lsr r0, r11, r0
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    lslpl r0, r11, r5
+; CHECK-NEXT:    cmp r12, #64
+; CHECK-NEXT:    orrlo r2, r2, r0
+; CHECK-NEXT:    ldr r0, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    ldr r5, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movpl r7, #0
+; CHECK-NEXT:    cmp r5, #64
+; CHECK-NEXT:    movhs r7, lr
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    str r0, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    moveq r7, r5
+; CHECK-NEXT:    cmp r3, #128
+; CHECK-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    movlo r7, r2
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    moveq r7, r6
+; CHECK-NEXT:    subs r0, r0, #1
+; CHECK-NEXT:    str r0, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; CHECK-NEXT:    ldr lr, [sp, #92] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r0, r0, #0
+; CHECK-NEXT:    str r0, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    ldr r12, [sp, #96] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r0, r0, #0
+; CHECK-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    ldmib sp, {r8, r10, r11} @ 12-byte Folded Reload
+; CHECK-NEXT:    sbcs r0, r0, #0
+; CHECK-NEXT:    str r0, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    adc r0, r0, #1
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:  .LBB3_4: @ %udiv-do-while
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    str r0, [sp, #92] @ 4-byte Spill
+; CHECK-NEXT:    mov r4, r9
+; CHECK-NEXT:    ldr r0, [sp, #120] @ 4-byte Reload
+; CHECK-NEXT:    lsl r6, r7, #1
+; CHECK-NEXT:    ldr r3, [sp, #116] @ 4-byte Reload
+; CHECK-NEXT:    and r5, r4, #1
+; CHECK-NEXT:    subs r0, r0, #1
+; CHECK-NEXT:    str r0, [sp, #120] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r11, r11, #0
+; CHECK-NEXT:    ldr r9, [sp, #100] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r10, r10, #0
+; CHECK-NEXT:    sbcs r8, r8, #0
+; CHECK-NEXT:    adc r3, r3, #1
+; CHECK-NEXT:    orr r2, r11, r8
+; CHECK-NEXT:    and r3, r3, #1
+; CHECK-NEXT:    str r3, [sp, #116] @ 4-byte Spill
+; CHECK-NEXT:    orr r3, r0, r3
+; CHECK-NEXT:    orr r3, r3, r10
+; CHECK-NEXT:    orr r0, r3, r2
+; CHECK-NEXT:    str r0, [sp, #96] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; CHECK-NEXT:    lsl r3, r1, #1
+; CHECK-NEXT:    orr r3, r3, r7, lsr #31
+; CHECK-NEXT:    lsl r2, r9, #1
+; CHECK-NEXT:    orr r2, r2, r1, lsr #31
+; CHECK-NEXT:    ldr r7, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    orr r6, r6, r0, lsr #31
+; CHECK-NEXT:    orr r5, r5, r0, lsl #1
+; CHECK-NEXT:    ldr r0, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #84] @ 4-byte Reload
+; CHECK-NEXT:    subs r4, r0, r5
+; CHECK-NEXT:    ldr r0, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r4, r0, r6
+; CHECK-NEXT:    ldr r0, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r4, r0, r3
+; CHECK-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r4, r0, r2
+; CHECK-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    sbc r4, r0, r9, lsr #31
+; CHECK-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    and r4, r4, #1
+; CHECK-NEXT:    rsb r4, r4, #0
+; CHECK-NEXT:    and r9, r4, r0
+; CHECK-NEXT:    ldr r0, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    and r7, r4, r7
+; CHECK-NEXT:    and r1, r4, r1
+; CHECK-NEXT:    subs r5, r5, r7
+; CHECK-NEXT:    and r0, r4, r0
+; CHECK-NEXT:    sbcs r7, r6, r1
+; CHECK-NEXT:    str r5, [sp, #108] @ 4-byte Spill
+; CHECK-NEXT:    sbcs r1, r3, r0
+; CHECK-NEXT:    ldr r3, [sp, #104] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r0, r2, r9
+; CHECK-NEXT:    ldr r2, [sp, #112] @ 4-byte Reload
+; CHECK-NEXT:    str r0, [sp, #100] @ 4-byte Spill
+; CHECK-NEXT:    and r4, r4, #1
+; CHECK-NEXT:    ldr r0, [sp, #92] @ 4-byte Reload
+; CHECK-NEXT:    ldr r9, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    orr r5, r0, r2, lsl #1
+; CHECK-NEXT:    lsl r0, r3, #1
+; CHECK-NEXT:    orr r0, r0, r2, lsr #31
+; CHECK-NEXT:    str r5, [sp, #112] @ 4-byte Spill
+; CHECK-NEXT:    orr r6, r9, r0
+; CHECK-NEXT:    lsl r0, r12, #1
+; CHECK-NEXT:    orr r0, r0, r3, lsr #31
+; CHECK-NEXT:    str r6, [sp, #104] @ 4-byte Spill
+; CHECK-NEXT:    orr r2, r9, r0
+; CHECK-NEXT:    lsl r0, lr, #1
+; CHECK-NEXT:    orr r0, r0, r12, lsr #31
+; CHECK-NEXT:    orr r3, r9, r0
+; CHECK-NEXT:    orr r0, r9, lr, lsr #31
+; CHECK-NEXT:    and r9, r0, #1
+; CHECK-NEXT:    ldr r0, [sp, #96] @ 4-byte Reload
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    mov lr, r3
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    bne .LBB3_4
+; CHECK-NEXT:  @ %bb.5:
+; CHECK-NEXT:    ldr r12, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r5, r2
+; CHECK-NEXT:    mov r0, r3
+; CHECK-NEXT:    mov r7, r12
+; CHECK-NEXT:    b .LBB3_7
+; CHECK-NEXT:  .LBB3_6:
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    ldr r5, [sp, #96] @ 4-byte Reload
+; CHECK-NEXT:    str r0, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    mov r7, #0
+; CHECK-NEXT:    ldr r0, [sp, #92] @ 4-byte Reload
+; CHECK-NEXT:    mov r12, #0
+; CHECK-NEXT:  .LBB3_7: @ %udiv-loop-exit
+; CHECK-NEXT:    lsl r2, r6, #1
+; CHECK-NEXT:    orr r8, r4, r1, lsl #1
+; CHECK-NEXT:    orr r2, r2, r1, lsr #31
+; CHECK-NEXT:    ldr r1, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    orr r3, r1, r2
+; CHECK-NEXT:    lsl r2, r5, #1
+; CHECK-NEXT:    orr r1, r2, r6, lsr #31
+; CHECK-NEXT:    lsl r2, r0, #1
+; CHECK-NEXT:    orr r2, r2, r5, lsr #31
+; CHECK-NEXT:    orr lr, r7, r1
+; CHECK-NEXT:    orr r2, r7, r2
+; CHECK-NEXT:    orr r7, r12, r0, lsr #31
+; CHECK-NEXT:    and r0, r7, #1
+; CHECK-NEXT:  .LBB3_8: @ %udiv-end
+; CHECK-NEXT:    str r2, [sp, #116] @ 4-byte Spill
+; CHECK-NEXT:    mov r5, #0
+; CHECK-NEXT:    ldr r2, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, #0
+; CHECK-NEXT:    ldr r10, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    mov r12, #0
+; CHECK-NEXT:    ldr r11, [sp, #84] @ 4-byte Reload
+; CHECK-NEXT:    umull r6, r7, r2, r8
+; CHECK-NEXT:    str r0, [sp, #112] @ 4-byte Spill
+; CHECK-NEXT:    mul r0, r10, r3
+; CHECK-NEXT:    str r6, [sp, #120] @ 4-byte Spill
+; CHECK-NEXT:    umlal r7, r5, r11, r8
+; CHECK-NEXT:    str r0, [sp, #108] @ 4-byte Spill
+; CHECK-NEXT:    umull r4, r6, r2, r3
+; CHECK-NEXT:    ldr r0, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    adds r4, r4, r7
+; CHECK-NEXT:    adcs r4, r5, r6
+; CHECK-NEXT:    umull r9, r6, r0, r8
+; CHECK-NEXT:    adc r4, r12, #0
+; CHECK-NEXT:    umlal r7, r5, r2, r3
+; CHECK-NEXT:    umlal r6, r1, r10, r8
+; CHECK-NEXT:    umlal r5, r4, r11, r3
+; CHECK-NEXT:    umlal r6, r1, r0, r3
+; CHECK-NEXT:    adds r9, r9, r5
+; CHECK-NEXT:    adcs r3, r6, r4
+; CHECK-NEXT:    ldr r4, [sp, #108] @ 4-byte Reload
+; CHECK-NEXT:    adc r10, r4, r1
+; CHECK-NEXT:    umull r4, r5, r2, lr
+; CHECK-NEXT:    ldr r1, [sp, #116] @ 4-byte Reload
+; CHECK-NEXT:    umlal r5, r12, r11, lr
+; CHECK-NEXT:    umlal r5, r12, r2, r1
+; CHECK-NEXT:    mla r6, r11, r1, r12
+; CHECK-NEXT:    adds r12, r4, r9
+; CHECK-NEXT:    adcs r3, r5, r3
+; CHECK-NEXT:    adc r1, r10, r6
+; CHECK-NEXT:    ldr r6, [sp, #112] @ 4-byte Reload
+; CHECK-NEXT:    mla r4, r0, lr, r1
+; CHECK-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r8, r0
+; CHECK-NEXT:    mla r0, r6, r2, r1
+; CHECK-NEXT:    ldr r1, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp, #120] @ 4-byte Reload
+; CHECK-NEXT:    ldr r6, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    subs r1, r1, r2
+; CHECK-NEXT:    sbcs r7, r6, r7
+; CHECK-NEXT:    ldr r6, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    add r0, r4, r0
+; CHECK-NEXT:    sbcs r2, r6, r12
+; CHECK-NEXT:    ldr r6, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    sbcs r3, r6, r3
+; CHECK-NEXT:    ldr r6, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    sbc r0, r6, r0
+; CHECK-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    eor r1, r1, r6
+; CHECK-NEXT:    eor r7, r7, r6
+; CHECK-NEXT:    subs r1, r1, r6
+; CHECK-NEXT:    eor r2, r2, r6
+; CHECK-NEXT:    sbcs r7, r7, r6
+; CHECK-NEXT:    eor r3, r3, r6
+; CHECK-NEXT:    sbcs r2, r2, r6
+; CHECK-NEXT:    sbcs r3, r3, r6
+; CHECK-NEXT:    ldr r6, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    stm r6, {r1, r7}
+; CHECK-NEXT:    str r2, [r6, #8]
+; CHECK-NEXT:    str r3, [r6, #12]
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    eor r0, r0, r1
+; CHECK-NEXT:    sbc r0, r0, r1
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    strb r0, [r6, #16]
+; CHECK-NEXT:    add sp, sp, #124
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    mov pc, lr
+  %res = srem i129 %a, %b
+  ret i129 %res
+}
diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll
--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -17,6 +17,7 @@
 ; CHECK-NEXT:   ModulePass Manager
 ; CHECK-NEXT:     Pre-ISel Intrinsic Lowering
 ; CHECK-NEXT:     FunctionPass Manager
+; CHECK-NEXT:       Expand large div/rem
 ; CHECK-NEXT:       Expand Atomic instructions
 ; CHECK-NEXT:       Lower AMX intrinsics
 ; CHECK-NEXT:       Lower AMX type for load/store
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -26,6 +26,7 @@
 ; CHECK-NEXT:   ModulePass Manager
 ; CHECK-NEXT:     Pre-ISel Intrinsic Lowering
 ; CHECK-NEXT:     FunctionPass Manager
+; CHECK-NEXT:       Expand large div/rem
 ; CHECK-NEXT:       Expand Atomic instructions
 ; CHECK-NEXT:       Lower AMX intrinsics
 ; CHECK-NEXT:       Lower AMX type for load/store
diff --git a/llvm/test/CodeGen/X86/udivmodei5.ll b/llvm/test/CodeGen/X86/udivmodei5.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/X86/udivmodei5.ll
@@ -0,0 +1,42376 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+
+define i129 @udiv129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: udiv129:
+; X86:       # %bb.0: # %_udiv-special-cases
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $88, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    andl $1, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    andl $1, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    sete (%esp) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ebp, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ebp, %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    sete %bl
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    jne .LBB0_1
+; X86-NEXT:  # %bb.2: # %_udiv-special-cases
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    bsrl %ecx, %ebp
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:    addl $32, %ebp
+; X86-NEXT:    movb (%esp), %bh # 1-byte Reload
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    je .LBB0_5
+; X86-NEXT:  .LBB0_4:
+; X86-NEXT:    bsrl %esi, %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:    je .LBB0_7
+; X86-NEXT:    jmp .LBB0_8
+; X86-NEXT:  .LBB0_1:
+; X86-NEXT:    bsrl %edi, %ebp
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:    movb (%esp), %bh # 1-byte Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    jne .LBB0_4
+; X86-NEXT:  .LBB0_5: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:    addl $32, %esi
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:    jne .LBB0_8
+; X86-NEXT:  .LBB0_7: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %esi
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:  .LBB0_8: # %_udiv-special-cases
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    jne .LBB0_9
+; X86-NEXT:  # %bb.10: # %_udiv-special-cases
+; X86-NEXT:    subl $-128, %ebp
+; X86-NEXT:    jmp .LBB0_11
+; X86-NEXT:  .LBB0_9:
+; X86-NEXT:    bsrl %edx, %ebp
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:    addl $96, %ebp
+; X86-NEXT:  .LBB0_11: # %_udiv-special-cases
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    subl $127, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %eax, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %eax, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %eax, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %eax, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne .LBB0_12
+; X86-NEXT:  # %bb.13: # %_udiv-special-cases
+; X86-NEXT:    bsrl %edi, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    addl $32, %eax
+; X86-NEXT:    jmp .LBB0_14
+; X86-NEXT:  .LBB0_12:
+; X86-NEXT:    bsrl %eax, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:  .LBB0_14: # %_udiv-special-cases
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB0_15
+; X86-NEXT:  # %bb.16: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:    addl $32, %ebp
+; X86-NEXT:    jmp .LBB0_17
+; X86-NEXT:  .LBB0_15:
+; X86-NEXT:    bsrl %esi, %ebp
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:  .LBB0_17: # %_udiv-special-cases
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    jne .LBB0_19
+; X86-NEXT:  # %bb.18: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %ebp
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB0_19: # %_udiv-special-cases
+; X86-NEXT:    orb %bl, %bh
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movb %bh, (%esp) # 1-byte Spill
+; X86-NEXT:    jne .LBB0_20
+; X86-NEXT:  # %bb.21: # %_udiv-special-cases
+; X86-NEXT:    subl $-128, %eax
+; X86-NEXT:    jmp .LBB0_22
+; X86-NEXT:  .LBB0_20:
+; X86-NEXT:    bsrl %ecx, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    addl $96, %eax
+; X86-NEXT:  .LBB0_22: # %_udiv-special-cases
+; X86-NEXT:    subl $127, %eax
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    sbbl %ebx, %ebx
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %ebp, %eax
+; X86-NEXT:    sbbl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl %edx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl %edi, %esi
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    sbbl %ebx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl $128, %eax
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl %ecx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %ebp, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %edx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %esi, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %edi, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %ebx, %eax
+; X86-NEXT:    setb %al
+; X86-NEXT:    movb (%esp), %cl # 1-byte Reload
+; X86-NEXT:    orb %al, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    jne .LBB0_24
+; X86-NEXT:  # %bb.23: # %_udiv-special-cases
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB0_24: # %_udiv-special-cases
+; X86-NEXT:    andl $1, %edx
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    jne .LBB0_26
+; X86-NEXT:  # %bb.25: # %_udiv-special-cases
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:  .LBB0_26: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB0_27
+; X86-NEXT:  # %bb.139: # %_udiv-special-cases
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    xorl $128, %esi
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ebx, %esi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB0_140
+; X86-NEXT:  # %bb.95: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    movb %al, %ch
+; X86-NEXT:    subb $64, %ch
+; X86-NEXT:    negb %ch
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB0_97
+; X86-NEXT:  # %bb.96: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:  .LBB0_97: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb $-128, %cl
+; X86-NEXT:    movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    subb %dl, %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    shldl %cl, %esi, %ebx
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movb %cl, (%esp) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB0_99
+; X86-NEXT:  # %bb.98:
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB0_99: # %udiv-bb1
+; X86-NEXT:    movb (%esp), %cl # 1-byte Reload
+; X86-NEXT:    subb $64, %cl
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jb .LBB0_101
+; X86-NEXT:  # %bb.100: # %udiv-bb1
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:  .LBB0_101: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    shldl %cl, %ebp, %edx
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB0_103
+; X86-NEXT:  # %bb.102: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB0_103: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    negb %cl
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB0_105
+; X86-NEXT:  # %bb.104: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB0_105: # %udiv-bb1
+; X86-NEXT:    cmpb $64, (%esp) # 1-byte Folded Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    jae .LBB0_107
+; X86-NEXT:  # %bb.106:
+; X86-NEXT:    orl %ecx, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB0_107: # %udiv-bb1
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrdl %cl, %eax, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB0_109
+; X86-NEXT:  # %bb.108: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:  .LBB0_109: # %udiv-bb1
+; X86-NEXT:    cmpb $64, (%esp) # 1-byte Folded Reload
+; X86-NEXT:    jae .LBB0_111
+; X86-NEXT:  # %bb.110:
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB0_111: # %udiv-bb1
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %edx
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB0_113
+; X86-NEXT:  # %bb.112: # %udiv-bb1
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB0_113: # %udiv-bb1
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    shrdl %cl, %edx, %ebx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB0_115
+; X86-NEXT:  # %bb.114: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:  .LBB0_115: # %udiv-bb1
+; X86-NEXT:    cmpb $64, %ch
+; X86-NEXT:    jae .LBB0_117
+; X86-NEXT:  # %bb.116:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB0_117: # %udiv-bb1
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    je .LBB0_119
+; X86-NEXT:  # %bb.118: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB0_119: # %udiv-bb1
+; X86-NEXT:    negb %cl
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB0_121
+; X86-NEXT:  # %bb.120: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB0_121: # %udiv-bb1
+; X86-NEXT:    cmpb $64, %al
+; X86-NEXT:    jb .LBB0_123
+; X86-NEXT:  # %bb.122: # %udiv-bb1
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:  .LBB0_123: # %udiv-bb1
+; X86-NEXT:    andl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    orl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpb $0, (%esp) # 1-byte Folded Reload
+; X86-NEXT:    js .LBB0_124
+; X86-NEXT:  # %bb.125: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    jne .LBB0_126
+; X86-NEXT:  .LBB0_127: # %udiv-bb1
+; X86-NEXT:    jns .LBB0_129
+; X86-NEXT:  .LBB0_128: # %udiv-bb1
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:  .LBB0_129: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    je .LBB0_131
+; X86-NEXT:  # %bb.130: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB0_131: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    js .LBB0_132
+; X86-NEXT:  # %bb.133: # %udiv-bb1
+; X86-NEXT:    jne .LBB0_134
+; X86-NEXT:  .LBB0_135: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jns .LBB0_137
+; X86-NEXT:  .LBB0_136: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB0_137: # %udiv-bb1
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    je .LBB0_138
+; X86-NEXT:  # %bb.30: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB0_31
+; X86-NEXT:  # %bb.32: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB0_33
+; X86-NEXT:  .LBB0_27:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jmp .LBB0_140
+; X86-NEXT:  .LBB0_124: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    je .LBB0_127
+; X86-NEXT:  .LBB0_126: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    js .LBB0_128
+; X86-NEXT:    jmp .LBB0_129
+; X86-NEXT:  .LBB0_132: # %udiv-bb1
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB0_135
+; X86-NEXT:  .LBB0_134: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    js .LBB0_136
+; X86-NEXT:    jmp .LBB0_137
+; X86-NEXT:  .LBB0_138:
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB0_29
+; X86-NEXT:  .LBB0_31:
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB0_33: # %udiv-preheader
+; X86-NEXT:    movl %edi, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    subb $64, %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    jb .LBB0_35
+; X86-NEXT:  # %bb.34: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB0_35: # %udiv-preheader
+; X86-NEXT:    negb %al
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shldl %cl, %esi, %ebp
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    jne .LBB0_37
+; X86-NEXT:  # %bb.36: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:  .LBB0_37: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb %dl, %ah
+; X86-NEXT:    addb $-64, %ah
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    movb %ah, %cl
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %ah
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB0_39
+; X86-NEXT:  # %bb.38: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB0_39: # %udiv-preheader
+; X86-NEXT:    cmpb $64, %dl
+; X86-NEXT:    jb .LBB0_40
+; X86-NEXT:  # %bb.41: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB0_42
+; X86-NEXT:  .LBB0_40:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:  .LBB0_42: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB0_44
+; X86-NEXT:  # %bb.43: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB0_44: # %udiv-preheader
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    shrdl %cl, %edx, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB0_46
+; X86-NEXT:  # %bb.45: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB0_46: # %udiv-preheader
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movb %ah, %cl
+; X86-NEXT:    shrdl %cl, %edi, %ebx
+; X86-NEXT:    testb $32, %ah
+; X86-NEXT:    jne .LBB0_48
+; X86-NEXT:  # %bb.47: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:  .LBB0_48: # %udiv-preheader
+; X86-NEXT:    cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jae .LBB0_50
+; X86-NEXT:  # %bb.49:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:  .LBB0_50: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB0_52
+; X86-NEXT:  # %bb.51: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:  .LBB0_52: # %udiv-preheader
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shldl %cl, %edi, %ebp
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    jne .LBB0_54
+; X86-NEXT:  # %bb.53: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:  .LBB0_54: # %udiv-preheader
+; X86-NEXT:    movb $-128, %ch
+; X86-NEXT:    subb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload
+; X86-NEXT:    cmpb $64, %ch
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jb .LBB0_56
+; X86-NEXT:  # %bb.55: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:  .LBB0_56: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shldl %cl, %edi, %esi
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    je .LBB0_58
+; X86-NEXT:  # %bb.57:
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB0_58: # %udiv-preheader
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    subb $64, %cl
+; X86-NEXT:    jb .LBB0_60
+; X86-NEXT:  # %bb.59: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB0_60: # %udiv-preheader
+; X86-NEXT:    negb %cl
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    shrdl %cl, %ebx, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB0_62
+; X86-NEXT:  # %bb.61: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:  .LBB0_62: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpb $64, %ch
+; X86-NEXT:    jb .LBB0_64
+; X86-NEXT:  # %bb.63: # %udiv-preheader
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB0_64: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    je .LBB0_66
+; X86-NEXT:  # %bb.65: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB0_66: # %udiv-preheader
+; X86-NEXT:    testb %ch, %ch
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    je .LBB0_68
+; X86-NEXT:  # %bb.67: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB0_68: # %udiv-preheader
+; X86-NEXT:    orl %edx, %ebp
+; X86-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    jne .LBB0_69
+; X86-NEXT:  # %bb.70: # %udiv-preheader
+; X86-NEXT:    js .LBB0_71
+; X86-NEXT:  .LBB0_72: # %udiv-preheader
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    je .LBB0_74
+; X86-NEXT:  .LBB0_73: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:  .LBB0_74: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb %al, %cl
+; X86-NEXT:    addb $-128, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB0_76
+; X86-NEXT:  # %bb.75: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:  .LBB0_76: # %udiv-preheader
+; X86-NEXT:    cmpb $64, %cl
+; X86-NEXT:    jb .LBB0_78
+; X86-NEXT:  # %bb.77: # %udiv-preheader
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:  .LBB0_78: # %udiv-preheader
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    je .LBB0_80
+; X86-NEXT:  # %bb.79: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB0_80: # %udiv-preheader
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    jns .LBB0_82
+; X86-NEXT:  # %bb.81: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB0_82: # %udiv-preheader
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    je .LBB0_84
+; X86-NEXT:  # %bb.83: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB0_84: # %udiv-preheader
+; X86-NEXT:    testb %ch, %ch
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    je .LBB0_86
+; X86-NEXT:  # %bb.85: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB0_86: # %udiv-preheader
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    jns .LBB0_88
+; X86-NEXT:  # %bb.87: # %udiv-preheader
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:  .LBB0_88: # %udiv-preheader
+; X86-NEXT:    je .LBB0_90
+; X86-NEXT:  # %bb.89: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB0_90: # %udiv-preheader
+; X86-NEXT:    orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    jns .LBB0_92
+; X86-NEXT:  # %bb.91: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB0_92: # %udiv-preheader
+; X86-NEXT:    je .LBB0_94
+; X86-NEXT:  # %bb.93: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB0_94: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    addl $-1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    adcl $-1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    adcl $-1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    adcl $-1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $1, %edi
+; X86-NEXT:    andl $1, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    movl (%esp), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB0_28: # %udiv-do-while
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl $1, %edx, %edi
+; X86-NEXT:    movl %edi, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl $1, %edx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    andl $1, %ebx
+; X86-NEXT:    leal (%ebx,%edx,2), %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl $1, %esi, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl $1, %ebp, %esi
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl $1, %esi, %ebp
+; X86-NEXT:    orl %edx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shrl $31, %ecx
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    addl %esi, %esi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    andl $1, %ebx
+; X86-NEXT:    cmpl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %esi
+; X86-NEXT:    andl $1, %esi
+; X86-NEXT:    negl %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    subl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl %edi, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %ebp, %ecx
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    addl $-1, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl $-1, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl $1, %esi
+; X86-NEXT:    andl $1, %esi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edi, %esi
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    jne .LBB0_28
+; X86-NEXT:  .LBB0_29: # %udiv-loop-exit
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl $1, %esi, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    shldl $1, %ebp, %esi
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl $1, %edx, %ebp
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    shrl $31, %ebx
+; X86-NEXT:    orl %eax, %ebx
+; X86-NEXT:    addl %edx, %edx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    andl $1, %ebx
+; X86-NEXT:  .LBB0_140: # %udiv-end
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl %ebp, 4(%eax)
+; X86-NEXT:    movl %esi, 8(%eax)
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    andl $1, %ebx
+; X86-NEXT:    movb %bl, 16(%eax)
+; X86-NEXT:    addl $88, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+; X86-NEXT:  .LBB0_69: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jns .LBB0_72
+; X86-NEXT:  .LBB0_71: # %udiv-preheader
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    jne .LBB0_73
+; X86-NEXT:    jmp .LBB0_74
+;
+; X64-LABEL: udiv129:
+; X64:       # %bb.0: # %_udiv-special-cases
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %r13
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    movq %rsi, %r9
+; X64-NEXT:    movl %r14d, %ebp
+; X64-NEXT:    andl $1, %ebp
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax def $rax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    movq %rcx, %rdx
+; X64-NEXT:    orq %rax, %rdx
+; X64-NEXT:    orq %r8, %rdx
+; X64-NEXT:    sete %r12b
+; X64-NEXT:    movq %rdi, %rdx
+; X64-NEXT:    orq %rbp, %rdx
+; X64-NEXT:    orq %rsi, %rdx
+; X64-NEXT:    sete %r15b
+; X64-NEXT:    bsrq %r8, %rdx
+; X64-NEXT:    xorq $63, %rdx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    bsrq %rcx, %rbx
+; X64-NEXT:    xorq $63, %rbx
+; X64-NEXT:    addq $64, %rbx
+; X64-NEXT:    testq %r8, %r8
+; X64-NEXT:    cmovneq %rdx, %rbx
+; X64-NEXT:    subq $-128, %rbx
+; X64-NEXT:    bsrq %rax, %r11
+; X64-NEXT:    xorq $63, %r11
+; X64-NEXT:    addq $64, %r11
+; X64-NEXT:    testq %rax, %rax
+; X64-NEXT:    cmoveq %rbx, %r11
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    subq $127, %r11
+; X64-NEXT:    movl $0, %r13d
+; X64-NEXT:    sbbq %r13, %r13
+; X64-NEXT:    movl $0, %ebx
+; X64-NEXT:    sbbq %rbx, %rbx
+; X64-NEXT:    movl $0, %r10d
+; X64-NEXT:    sbbq %r10, %r10
+; X64-NEXT:    bsrq %rsi, %rsi
+; X64-NEXT:    xorq $63, %rsi
+; X64-NEXT:    bsrq %rdi, %rcx
+; X64-NEXT:    xorq $63, %rcx
+; X64-NEXT:    addq $64, %rcx
+; X64-NEXT:    testq %r9, %r9
+; X64-NEXT:    cmovneq %rsi, %rcx
+; X64-NEXT:    subq $-128, %rcx
+; X64-NEXT:    bsrq %rbp, %rsi
+; X64-NEXT:    xorq $63, %rsi
+; X64-NEXT:    addq $64, %rsi
+; X64-NEXT:    testq %rbp, %rbp
+; X64-NEXT:    cmoveq %rcx, %rsi
+; X64-NEXT:    subq $127, %rsi
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    movl $0, %ebp
+; X64-NEXT:    sbbq %rbp, %rbp
+; X64-NEXT:    movl $0, %edx
+; X64-NEXT:    sbbq %rdx, %rdx
+; X64-NEXT:    subq %rsi, %r11
+; X64-NEXT:    sbbq %rcx, %r13
+; X64-NEXT:    sbbq %rbp, %rbx
+; X64-NEXT:    sbbq %rdx, %r10
+; X64-NEXT:    movl $128, %ecx
+; X64-NEXT:    cmpq %r11, %rcx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %r13, %rcx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rbx, %rcx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %r10, %rcx
+; X64-NEXT:    setb %dl
+; X64-NEXT:    orb %r15b, %dl
+; X64-NEXT:    orb %r12b, %dl
+; X64-NEXT:    movq %r14, %rcx
+; X64-NEXT:    cmovneq %rax, %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    testb %dl, %dl
+; X64-NEXT:    movq %r9, %rdx
+; X64-NEXT:    cmovneq %rax, %rdx
+; X64-NEXT:    cmoveq %rdi, %rax
+; X64-NEXT:    jne .LBB0_6
+; X64-NEXT:  # %bb.1: # %_udiv-special-cases
+; X64-NEXT:    andl $1, %ebx
+; X64-NEXT:    movq %r11, %rsi
+; X64-NEXT:    xorq $128, %rsi
+; X64-NEXT:    orq %rbx, %rsi
+; X64-NEXT:    orq %r13, %rsi
+; X64-NEXT:    je .LBB0_6
+; X64-NEXT:  # %bb.2: # %udiv-bb1
+; X64-NEXT:    movq %r9, %r10
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl %r11d, %r12d
+; X64-NEXT:    movb $-128, %r8b
+; X64-NEXT:    subb %r11b, %r8b
+; X64-NEXT:    movq %r14, %rdx
+; X64-NEXT:    movl %r8d, %ecx
+; X64-NEXT:    shlq %cl, %rdx
+; X64-NEXT:    movq %rdi, %rbp
+; X64-NEXT:    shlq %cl, %rbp
+; X64-NEXT:    xorl %r15d, %r15d
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    cmovneq %r15, %rax
+; X64-NEXT:    cmovneq %r15, %rdx
+; X64-NEXT:    movq %r9, %rsi
+; X64-NEXT:    shldq %cl, %rdi, %rsi
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    cmovneq %rbp, %rsi
+; X64-NEXT:    addq $1, %r11
+; X64-NEXT:    adcq $0, %r13
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    andl $1, %ebx
+; X64-NEXT:    movq %rdi, %r9
+; X64-NEXT:    movl %r12d, %ecx
+; X64-NEXT:    shrdq %cl, %r10, %r9
+; X64-NEXT:    movq %r10, %rbp
+; X64-NEXT:    shrq %cl, %rbp
+; X64-NEXT:    testb $64, %r12b
+; X64-NEXT:    cmoveq %r9, %rbp
+; X64-NEXT:    orl %ebp, %edx
+; X64-NEXT:    negb %r12b
+; X64-NEXT:    movq %rdi, %rbp
+; X64-NEXT:    movl %r12d, %ecx
+; X64-NEXT:    shlq %cl, %rbp
+; X64-NEXT:    testb $64, %r12b
+; X64-NEXT:    cmovneq %r15, %rbp
+; X64-NEXT:    testb %r8b, %r8b
+; X64-NEXT:    cmovsq %r15, %rsi
+; X64-NEXT:    cmovsq %rbp, %rdx
+; X64-NEXT:    cmoveq %r14, %rdx
+; X64-NEXT:    cmovsq %r15, %rax
+; X64-NEXT:    movq %r11, %rcx
+; X64-NEXT:    orq %rbx, %rcx
+; X64-NEXT:    orq %r13, %rcx
+; X64-NEXT:    je .LBB0_7
+; X64-NEXT:  # %bb.3: # %udiv-preheader
+; X64-NEXT:    andl $1, %edx
+; X64-NEXT:    movq %rdi, %r8
+; X64-NEXT:    movl %r11d, %ecx
+; X64-NEXT:    shrdq %cl, %r10, %r8
+; X64-NEXT:    movq %r10, %r15
+; X64-NEXT:    shrq %cl, %r10
+; X64-NEXT:    xorl %r12d, %r12d
+; X64-NEXT:    testb $64, %r11b
+; X64-NEXT:    cmovneq %r10, %r8
+; X64-NEXT:    cmovneq %r12, %r10
+; X64-NEXT:    andl $1, %r14d
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    subb %r11b, %cl
+; X64-NEXT:    xorl %r9d, %r9d
+; X64-NEXT:    shldq %cl, %r14, %r9
+; X64-NEXT:    movq %r14, %rbp
+; X64-NEXT:    shlq %cl, %rbp
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rbp, %r9
+; X64-NEXT:    cmovneq %r12, %rbp
+; X64-NEXT:    orq %r8, %rbp
+; X64-NEXT:    orq %r10, %r9
+; X64-NEXT:    leal -128(%r11), %ecx
+; X64-NEXT:    shrdq %cl, %r12, %r14
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r12, %r14
+; X64-NEXT:    testb %r11b, %r11b
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovsq %r12, %r9
+; X64-NEXT:    cmoveq %r15, %r9
+; X64-NEXT:    cmovnsq %rbp, %r14
+; X64-NEXT:    cmoveq %rdi, %r14
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    addq $-1, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    adcq $1, %r12
+; X64-NEXT:    andl $1, %r12d
+; X64-NEXT:    xorl %r15d, %r15d
+; X64-NEXT:    xorl %r8d, %r8d
+; X64-NEXT:    xorl %ebp, %ebp
+; X64-NEXT:    movq %r9, %r10
+; X64-NEXT:    movq %rsi, %rdi
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB0_4: # %udiv-do-while
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    shldq $1, %r14, %r10
+; X64-NEXT:    shrq $63, %r9
+; X64-NEXT:    andl $1, %edx
+; X64-NEXT:    leaq (%rdx,%r14,2), %r14
+; X64-NEXT:    shldq $1, %rax, %rdi
+; X64-NEXT:    orq %r8, %rdi
+; X64-NEXT:    shrq $63, %rsi
+; X64-NEXT:    addq %rax, %rax
+; X64-NEXT:    orq %r15, %rax
+; X64-NEXT:    orl %esi, %ebp
+; X64-NEXT:    movl %ebp, %edx
+; X64-NEXT:    andl $1, %edx
+; X64-NEXT:    cmpq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, %rsi
+; X64-NEXT:    sbbq %r10, %rsi
+; X64-NEXT:    movq %r12, %rsi
+; X64-NEXT:    sbbq %r9, %rsi
+; X64-NEXT:    andl $1, %esi
+; X64-NEXT:    negq %rsi
+; X64-NEXT:    movl %esi, %r15d
+; X64-NEXT:    andl $1, %r15d
+; X64-NEXT:    movq %rsi, %rbp
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    subq %rsi, %r14
+; X64-NEXT:    sbbq %rbp, %r10
+; X64-NEXT:    addq $-1, %r11
+; X64-NEXT:    adcq $-1, %r13
+; X64-NEXT:    adcq $1, %rbx
+; X64-NEXT:    andl $1, %ebx
+; X64-NEXT:    movq %r11, %rsi
+; X64-NEXT:    orq %rbx, %rsi
+; X64-NEXT:    orq %r13, %rsi
+; X64-NEXT:    movl $0, %r8d
+; X64-NEXT:    movl $0, %ebp
+; X64-NEXT:    movq %r10, %r9
+; X64-NEXT:    movq %rdi, %rsi
+; X64-NEXT:    jne .LBB0_4
+; X64-NEXT:    jmp .LBB0_5
+; X64-NEXT:  .LBB0_7:
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rsi, %rdi
+; X64-NEXT:  .LBB0_5: # %udiv-loop-exit
+; X64-NEXT:    movq %rdi, %rdx
+; X64-NEXT:    shldq $1, %rax, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    orq %rcx, %rdx
+; X64-NEXT:    shrq $63, %rdi
+; X64-NEXT:    addq %rax, %rax
+; X64-NEXT:    orq %r15, %rax
+; X64-NEXT:    orl %edi, %ecx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:  .LBB0_6: # %udiv-end
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r12
+; X64-NEXT:    popq %r13
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %r15
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+  %res = udiv i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @urem129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: urem129:
+; X86:       # %bb.0: # %_udiv-special-cases
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $84, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    andl $1, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    andl $1, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    sete {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    sete %al
+; X86-NEXT:    testl %ebp, %ebp
+; X86-NEXT:    jne .LBB1_1
+; X86-NEXT:  # %bb.2: # %_udiv-special-cases
+; X86-NEXT:    bsrl %ebx, %ebp
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:    addl $32, %ebp
+; X86-NEXT:    jmp .LBB1_3
+; X86-NEXT:  .LBB1_1:
+; X86-NEXT:    bsrl %ebp, %ebp
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:  .LBB1_3: # %_udiv-special-cases
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jne .LBB1_4
+; X86-NEXT:  # %bb.5: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:    addl $32, %esi
+; X86-NEXT:    jmp .LBB1_6
+; X86-NEXT:  .LBB1_4:
+; X86-NEXT:    bsrl %esi, %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:  .LBB1_6: # %_udiv-special-cases
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    jne .LBB1_8
+; X86-NEXT:  # %bb.7: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %esi
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:  .LBB1_8: # %_udiv-special-cases
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    jne .LBB1_9
+; X86-NEXT:  # %bb.10: # %_udiv-special-cases
+; X86-NEXT:    subl $-128, %ebx
+; X86-NEXT:    jmp .LBB1_11
+; X86-NEXT:  .LBB1_9:
+; X86-NEXT:    bsrl %edx, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:    addl $96, %ebx
+; X86-NEXT:  .LBB1_11: # %_udiv-special-cases
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    subl $127, %ebx
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    movl %ebx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB1_12
+; X86-NEXT:  # %bb.13: # %_udiv-special-cases
+; X86-NEXT:    bsrl %eax, %ecx
+; X86-NEXT:    xorl $31, %ecx
+; X86-NEXT:    addl $32, %ecx
+; X86-NEXT:    jmp .LBB1_14
+; X86-NEXT:  .LBB1_12:
+; X86-NEXT:    bsrl %edi, %ecx
+; X86-NEXT:    xorl $31, %ecx
+; X86-NEXT:  .LBB1_14: # %_udiv-special-cases
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB1_15
+; X86-NEXT:  # %bb.16: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:    addl $32, %ebp
+; X86-NEXT:    jmp .LBB1_17
+; X86-NEXT:  .LBB1_15:
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    bsrl %edi, %ebp
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:  .LBB1_17: # %_udiv-special-cases
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    jne .LBB1_19
+; X86-NEXT:  # %bb.18: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %ebp
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB1_19: # %_udiv-special-cases
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT:    orb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB1_20
+; X86-NEXT:  # %bb.21: # %_udiv-special-cases
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    subl $-128, %ecx
+; X86-NEXT:    jmp .LBB1_22
+; X86-NEXT:  .LBB1_20:
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    bsrl %eax, %ecx
+; X86-NEXT:    xorl $31, %ecx
+; X86-NEXT:    addl $96, %ecx
+; X86-NEXT:  .LBB1_22: # %_udiv-special-cases
+; X86-NEXT:    subl $127, %ecx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %eax, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %eax, %eax
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    sbbl %ebx, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    subl %ecx, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl %esi, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl %edi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl $128, %eax
+; X86-NEXT:    cmpl (%esp), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %ecx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %edx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %ebx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %ebp, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %esi, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %edi, %eax
+; X86-NEXT:    setb %al
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    orb %al, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB1_24
+; X86-NEXT:  # %bb.23: # %_udiv-special-cases
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:  .LBB1_24: # %_udiv-special-cases
+; X86-NEXT:    andl $1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    jne .LBB1_26
+; X86-NEXT:  # %bb.25: # %_udiv-special-cases
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:  .LBB1_26: # %_udiv-special-cases
+; X86-NEXT:    movl (%esp), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB1_74
+; X86-NEXT:  # %bb.27: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    xorl $128, %ecx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    je .LBB1_74
+; X86-NEXT:  # %bb.28: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    subb $64, %cl
+; X86-NEXT:    negb %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB1_30
+; X86-NEXT:  # %bb.29: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB1_30: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb $-128, %cl
+; X86-NEXT:    subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shldl %cl, %ebx, %eax
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    shldl %cl, %esi, %ebp
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB1_32
+; X86-NEXT:  # %bb.31:
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:  .LBB1_32: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    subb $64, %ch
+; X86-NEXT:    jb .LBB1_34
+; X86-NEXT:  # %bb.33: # %udiv-bb1
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:  .LBB1_34: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB1_36
+; X86-NEXT:  # %bb.35: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB1_36: # %udiv-bb1
+; X86-NEXT:    negb %ch
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB1_38
+; X86-NEXT:  # %bb.37: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:  .LBB1_38: # %udiv-bb1
+; X86-NEXT:    cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jae .LBB1_40
+; X86-NEXT:  # %bb.39:
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB1_40: # %udiv-bb1
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB1_42
+; X86-NEXT:  # %bb.41: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB1_42: # %udiv-bb1
+; X86-NEXT:    cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    jae .LBB1_44
+; X86-NEXT:  # %bb.43:
+; X86-NEXT:    orl %ebx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB1_44: # %udiv-bb1
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    shrdl %cl, %esi, %ebx
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB1_46
+; X86-NEXT:  # %bb.45: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:  .LBB1_46: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    shrdl %cl, %ebx, %ebp
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB1_48
+; X86-NEXT:  # %bb.47: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:  .LBB1_48: # %udiv-bb1
+; X86-NEXT:    cmpb $64, %cl
+; X86-NEXT:    jae .LBB1_50
+; X86-NEXT:  # %bb.49:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:  .LBB1_50: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    addl $1, %ecx
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    je .LBB1_52
+; X86-NEXT:  # %bb.51: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:  .LBB1_52: # %udiv-bb1
+; X86-NEXT:    negb %cl
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB1_54
+; X86-NEXT:  # %bb.53: # %udiv-bb1
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB1_54: # %udiv-bb1
+; X86-NEXT:    cmpb $64, %cl
+; X86-NEXT:    jb .LBB1_56
+; X86-NEXT:  # %bb.55: # %udiv-bb1
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB1_56: # %udiv-bb1
+; X86-NEXT:    andl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    orl %ebx, %edx
+; X86-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    jns .LBB1_58
+; X86-NEXT:  # %bb.57: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB1_58: # %udiv-bb1
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB1_60
+; X86-NEXT:  # %bb.59: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB1_60: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jns .LBB1_62
+; X86-NEXT:  # %bb.61: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB1_62: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    jne .LBB1_63
+; X86-NEXT:  # %bb.64: # %udiv-bb1
+; X86-NEXT:    js .LBB1_65
+; X86-NEXT:  .LBB1_66: # %udiv-bb1
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    jne .LBB1_67
+; X86-NEXT:  .LBB1_68: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jns .LBB1_70
+; X86-NEXT:  .LBB1_69: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB1_70: # %udiv-bb1
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    je .LBB1_71
+; X86-NEXT:  # %bb.75: # %udiv-preheader
+; X86-NEXT:    shrdl %cl, %ebp, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB1_76
+; X86-NEXT:  # %bb.77: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB1_78
+; X86-NEXT:  .LBB1_63: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jns .LBB1_66
+; X86-NEXT:  .LBB1_65: # %udiv-bb1
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    je .LBB1_68
+; X86-NEXT:  .LBB1_67: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    js .LBB1_69
+; X86-NEXT:    jmp .LBB1_70
+; X86-NEXT:  .LBB1_71:
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB1_73
+; X86-NEXT:  .LBB1_76:
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB1_78: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    subb $64, %dl
+; X86-NEXT:    jb .LBB1_80
+; X86-NEXT:  # %bb.79: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB1_80: # %udiv-preheader
+; X86-NEXT:    negb %dl
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    jne .LBB1_82
+; X86-NEXT:  # %bb.81: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB1_82: # %udiv-preheader
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    movb %al, %dh
+; X86-NEXT:    addb $-64, %dh
+; X86-NEXT:    movb %dh, %cl
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %dh
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB1_84
+; X86-NEXT:  # %bb.83: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB1_84: # %udiv-preheader
+; X86-NEXT:    cmpb $64, %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    jb .LBB1_85
+; X86-NEXT:  # %bb.86: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB1_87
+; X86-NEXT:  .LBB1_85:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB1_87: # %udiv-preheader
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB1_89
+; X86-NEXT:  # %bb.88: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB1_89: # %udiv-preheader
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    shrdl %cl, %ebp, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB1_91
+; X86-NEXT:  # %bb.90: # %udiv-preheader
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB1_91: # %udiv-preheader
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movb %dh, %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    shrdl %cl, %ebx, %edi
+; X86-NEXT:    testb $32, %dh
+; X86-NEXT:    jne .LBB1_93
+; X86-NEXT:  # %bb.92: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB1_93: # %udiv-preheader
+; X86-NEXT:    movl (%esp), %ebx # 4-byte Reload
+; X86-NEXT:    cmpb $64, %bl
+; X86-NEXT:    jae .LBB1_95
+; X86-NEXT:  # %bb.94:
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB1_95: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB1_97
+; X86-NEXT:  # %bb.96: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:  .LBB1_97: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    jne .LBB1_99
+; X86-NEXT:  # %bb.98: # %udiv-preheader
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB1_99: # %udiv-preheader
+; X86-NEXT:    movb $-128, %ch
+; X86-NEXT:    subb %bl, %ch
+; X86-NEXT:    cmpb $64, %ch
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jb .LBB1_101
+; X86-NEXT:  # %bb.100: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB1_101: # %udiv-preheader
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shldl %cl, %edi, %esi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    je .LBB1_103
+; X86-NEXT:  # %bb.102:
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB1_103: # %udiv-preheader
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    subb $64, %cl
+; X86-NEXT:    jb .LBB1_105
+; X86-NEXT:  # %bb.104: # %udiv-preheader
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB1_105: # %udiv-preheader
+; X86-NEXT:    negb %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    shrdl %cl, %edx, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB1_107
+; X86-NEXT:  # %bb.106: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:  .LBB1_107: # %udiv-preheader
+; X86-NEXT:    cmpb $64, %ch
+; X86-NEXT:    jb .LBB1_109
+; X86-NEXT:  # %bb.108: # %udiv-preheader
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:  .LBB1_109: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    testb %dl, %dl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB1_111
+; X86-NEXT:  # %bb.110: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB1_111: # %udiv-preheader
+; X86-NEXT:    testb %ch, %ch
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    je .LBB1_113
+; X86-NEXT:  # %bb.112: # %udiv-preheader
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB1_113: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    testb %dl, %dl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    jne .LBB1_114
+; X86-NEXT:  # %bb.115: # %udiv-preheader
+; X86-NEXT:    js .LBB1_116
+; X86-NEXT:  .LBB1_117: # %udiv-preheader
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    je .LBB1_119
+; X86-NEXT:  .LBB1_118: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB1_119: # %udiv-preheader
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    movb %dl, %cl
+; X86-NEXT:    addb $-128, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    je .LBB1_120
+; X86-NEXT:  # %bb.121: # %udiv-preheader
+; X86-NEXT:    cmpb $64, %cl
+; X86-NEXT:    jae .LBB1_122
+; X86-NEXT:  .LBB1_123: # %udiv-preheader
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    je .LBB1_125
+; X86-NEXT:  .LBB1_124: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB1_125: # %udiv-preheader
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    cmpb $0, (%esp) # 1-byte Folded Reload
+; X86-NEXT:    jns .LBB1_127
+; X86-NEXT:  # %bb.126: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB1_127: # %udiv-preheader
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB1_129
+; X86-NEXT:  # %bb.128: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:  .LBB1_129: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb %ch, %ch
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    je .LBB1_131
+; X86-NEXT:  # %bb.130: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB1_131: # %udiv-preheader
+; X86-NEXT:    orl %esi, %ebp
+; X86-NEXT:    cmpb $0, (%esp) # 1-byte Folded Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    jns .LBB1_133
+; X86-NEXT:  # %bb.132: # %udiv-preheader
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:  .LBB1_133: # %udiv-preheader
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    je .LBB1_135
+; X86-NEXT:  # %bb.134: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:  .LBB1_135: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    cmpb $0, (%esp) # 1-byte Folded Reload
+; X86-NEXT:    jns .LBB1_137
+; X86-NEXT:  # %bb.136: # %udiv-preheader
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:  .LBB1_137: # %udiv-preheader
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    je .LBB1_139
+; X86-NEXT:  # %bb.138: # %udiv-preheader
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:  .LBB1_139: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    addl $-1, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    adcl $-1, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    adcl $-1, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    adcl $-1, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    adcl $1, %ecx
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movl (%esp), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB1_72: # %udiv-do-while
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %ebp, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl $1, %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebp, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl $1, %esi, %ebp
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $1, %edi
+; X86-NEXT:    leal (%edi,%esi,2), %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %eax
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shrl $31, %edx
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    addl %ecx, %ecx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    andl $1, %edi
+; X86-NEXT:    cmpl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl %ebp, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subl %ecx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %edx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    addl $-1, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl $-1, %eax
+; X86-NEXT:    adcl $-1, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl $1, %ecx
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    orl %ebp, %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB1_72
+; X86-NEXT:  .LBB1_73: # %udiv-loop-exit
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    shldl $1, %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ebx
+; X86-NEXT:    orl %ecx, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shrl $31, %edx
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    addl %edi, %edi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    andl $1, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB1_74: # %udiv-end
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ebp, %ebx
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %edx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    setb %cl
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl %cl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    imull %ebp, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edx, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    adcl %ebp, %edx
+; X86-NEXT:    imull %ecx, %esi
+; X86-NEXT:    addl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    adcl %edi, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    addl %esi, %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    imull {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    addl %ecx, %esi
+; X86-NEXT:    addl %edx, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    subl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    sbbl (%esp), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl %ebx, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    sbbl %eax, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    sbbl %esi, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %edi, (%eax)
+; X86-NEXT:    movl %ecx, 4(%eax)
+; X86-NEXT:    movl %edx, 8(%eax)
+; X86-NEXT:    movl %ebp, 12(%eax)
+; X86-NEXT:    andl $1, %ebx
+; X86-NEXT:    movb %bl, 16(%eax)
+; X86-NEXT:    addl $84, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+; X86-NEXT:  .LBB1_114: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jns .LBB1_117
+; X86-NEXT:  .LBB1_116: # %udiv-preheader
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    jne .LBB1_118
+; X86-NEXT:    jmp .LBB1_119
+; X86-NEXT:  .LBB1_120: # %udiv-preheader
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    cmpb $64, %cl
+; X86-NEXT:    jb .LBB1_123
+; X86-NEXT:  .LBB1_122: # %udiv-preheader
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    jne .LBB1_124
+; X86-NEXT:    jmp .LBB1_125
+;
+; X64-LABEL: urem129:
+; X64:       # %bb.0: # %_udiv-special-cases
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %r13
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movq %r8, %r13
+; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    movl %r14d, %r12d
+; X64-NEXT:    andl $1, %r12d
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl %r9d, %ebp
+; X64-NEXT:    andl $1, %ebp
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    orq %rbp, %rax
+; X64-NEXT:    orq %r8, %rax
+; X64-NEXT:    sete %r8b
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    orq %r12, %rax
+; X64-NEXT:    orq %rsi, %rax
+; X64-NEXT:    sete %r15b
+; X64-NEXT:    bsrq %r13, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    bsrq %rcx, %rdx
+; X64-NEXT:    xorq $63, %rdx
+; X64-NEXT:    addq $64, %rdx
+; X64-NEXT:    testq %r13, %r13
+; X64-NEXT:    cmovneq %rax, %rdx
+; X64-NEXT:    subq $-128, %rdx
+; X64-NEXT:    bsrq %rbp, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    addq $64, %rax
+; X64-NEXT:    testq %rbp, %rbp
+; X64-NEXT:    cmoveq %rdx, %rax
+; X64-NEXT:    xorl %r10d, %r10d
+; X64-NEXT:    subq $127, %rax
+; X64-NEXT:    movl $0, %r9d
+; X64-NEXT:    sbbq %r9, %r9
+; X64-NEXT:    movl $0, %ebp
+; X64-NEXT:    sbbq %rbp, %rbp
+; X64-NEXT:    movl $0, %r11d
+; X64-NEXT:    sbbq %r11, %r11
+; X64-NEXT:    bsrq %rsi, %rdx
+; X64-NEXT:    xorq $63, %rdx
+; X64-NEXT:    bsrq %rdi, %rbx
+; X64-NEXT:    xorq $63, %rbx
+; X64-NEXT:    addq $64, %rbx
+; X64-NEXT:    testq %rsi, %rsi
+; X64-NEXT:    cmovneq %rdx, %rbx
+; X64-NEXT:    subq $-128, %rbx
+; X64-NEXT:    bsrq %r12, %rdx
+; X64-NEXT:    xorq $63, %rdx
+; X64-NEXT:    addq $64, %rdx
+; X64-NEXT:    testq %r12, %r12
+; X64-NEXT:    cmoveq %rbx, %rdx
+; X64-NEXT:    subq $127, %rdx
+; X64-NEXT:    movl $0, %ebx
+; X64-NEXT:    sbbq %rbx, %rbx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    movl $0, %r12d
+; X64-NEXT:    sbbq %r12, %r12
+; X64-NEXT:    subq %rdx, %rax
+; X64-NEXT:    sbbq %rbx, %r9
+; X64-NEXT:    sbbq %rcx, %rbp
+; X64-NEXT:    sbbq %r12, %r11
+; X64-NEXT:    movq %rsi, %r12
+; X64-NEXT:    movl $128, %ecx
+; X64-NEXT:    cmpq %rax, %rcx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %r9, %rcx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rbp, %rcx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %r11, %rcx
+; X64-NEXT:    movq %rdi, %r11
+; X64-NEXT:    setb %cl
+; X64-NEXT:    orb %r15b, %cl
+; X64-NEXT:    orb %r8b, %cl
+; X64-NEXT:    movq %r14, %rbx
+; X64-NEXT:    cmovneq %r10, %rbx
+; X64-NEXT:    andl $1, %ebx
+; X64-NEXT:    testb %cl, %cl
+; X64-NEXT:    movq %rsi, %r15
+; X64-NEXT:    cmovneq %r10, %r15
+; X64-NEXT:    cmoveq %rdi, %r10
+; X64-NEXT:    jne .LBB1_6
+; X64-NEXT:  # %bb.1: # %_udiv-special-cases
+; X64-NEXT:    andl $1, %ebp
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    xorq $128, %rcx
+; X64-NEXT:    orq %rbp, %rcx
+; X64-NEXT:    orq %r9, %rcx
+; X64-NEXT:    je .LBB1_6
+; X64-NEXT:  # %bb.2: # %udiv-bb1
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    movb $-128, %r8b
+; X64-NEXT:    subb %al, %r8b
+; X64-NEXT:    movq %r14, %r13
+; X64-NEXT:    movl %r8d, %ecx
+; X64-NEXT:    shlq %cl, %r13
+; X64-NEXT:    movq %r11, %rdi
+; X64-NEXT:    shlq %cl, %rdi
+; X64-NEXT:    movq %r12, %rsi
+; X64-NEXT:    xorl %r12d, %r12d
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    movq %rdi, %r10
+; X64-NEXT:    cmovneq %r12, %r10
+; X64-NEXT:    cmovneq %r12, %r13
+; X64-NEXT:    movq %rsi, %r15
+; X64-NEXT:    shldq %cl, %r11, %r15
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    cmovneq %rdi, %r15
+; X64-NEXT:    addq $1, %rax
+; X64-NEXT:    adcq $0, %r9
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    andl $1, %ebp
+; X64-NEXT:    movq %r11, %rdi
+; X64-NEXT:    movl %edx, %ecx
+; X64-NEXT:    shrdq %cl, %rsi, %rdi
+; X64-NEXT:    movq %rsi, %rbx
+; X64-NEXT:    shrq %cl, %rbx
+; X64-NEXT:    testb $64, %dl
+; X64-NEXT:    cmoveq %rdi, %rbx
+; X64-NEXT:    orl %ebx, %r13d
+; X64-NEXT:    negb %dl
+; X64-NEXT:    movq %r11, %rdi
+; X64-NEXT:    movl %edx, %ecx
+; X64-NEXT:    shlq %cl, %rdi
+; X64-NEXT:    testb $64, %dl
+; X64-NEXT:    cmovneq %r12, %rdi
+; X64-NEXT:    testb %r8b, %r8b
+; X64-NEXT:    cmovsq %r12, %r15
+; X64-NEXT:    cmovsq %rdi, %r13
+; X64-NEXT:    cmoveq %r14, %r13
+; X64-NEXT:    cmovsq %r12, %r10
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    orq %rbp, %rcx
+; X64-NEXT:    orq %r9, %rcx
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    je .LBB1_7
+; X64-NEXT:  # %bb.3: # %udiv-preheader
+; X64-NEXT:    andl $1, %r13d
+; X64-NEXT:    movq %r11, %r8
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    shrdq %cl, %rsi, %r8
+; X64-NEXT:    movq %rsi, %rbx
+; X64-NEXT:    shrq %cl, %rbx
+; X64-NEXT:    xorl %r12d, %r12d
+; X64-NEXT:    testb $64, %al
+; X64-NEXT:    cmovneq %rbx, %r8
+; X64-NEXT:    cmovneq %r12, %rbx
+; X64-NEXT:    movl %r14d, %edx
+; X64-NEXT:    andl $1, %edx
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    subb %al, %cl
+; X64-NEXT:    xorl %r14d, %r14d
+; X64-NEXT:    shldq %cl, %rdx, %r14
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    shlq %cl, %rdi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdi, %r14
+; X64-NEXT:    cmovneq %r12, %rdi
+; X64-NEXT:    orq %r8, %rdi
+; X64-NEXT:    orq %rbx, %r14
+; X64-NEXT:    leal -128(%rax), %ecx
+; X64-NEXT:    shrdq %cl, %r12, %rdx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r12, %rdx
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovsq %r12, %r14
+; X64-NEXT:    cmoveq %rsi, %r14
+; X64-NEXT:    cmovnsq %rdi, %rdx
+; X64-NEXT:    cmoveq %r11, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    addq $-1, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    adcq $1, %rsi
+; X64-NEXT:    andl $1, %esi
+; X64-NEXT:    xorl %r12d, %r12d
+; X64-NEXT:    xorl %ebx, %ebx
+; X64-NEXT:    xorl %r8d, %r8d
+; X64-NEXT:    movq %r14, %r11
+; X64-NEXT:    movq %r15, %rcx
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB1_4: # %udiv-do-while
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    shldq $1, %rdx, %r11
+; X64-NEXT:    shrq $63, %r14
+; X64-NEXT:    andl $1, %r13d
+; X64-NEXT:    leaq (%r13,%rdx,2), %rdx
+; X64-NEXT:    shldq $1, %r10, %rcx
+; X64-NEXT:    orq %rbx, %rcx
+; X64-NEXT:    shrq $63, %r15
+; X64-NEXT:    addq %r10, %r10
+; X64-NEXT:    orq %r12, %r10
+; X64-NEXT:    orl %r15d, %r8d
+; X64-NEXT:    movl %r8d, %r13d
+; X64-NEXT:    andl $1, %r13d
+; X64-NEXT:    cmpq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    sbbq %r11, %rbx
+; X64-NEXT:    movq %rsi, %rbx
+; X64-NEXT:    sbbq %r14, %rbx
+; X64-NEXT:    andl $1, %ebx
+; X64-NEXT:    negq %rbx
+; X64-NEXT:    movl %ebx, %r12d
+; X64-NEXT:    andl $1, %r12d
+; X64-NEXT:    movq %rbx, %rdi
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    subq %rbx, %rdx
+; X64-NEXT:    sbbq %rdi, %r11
+; X64-NEXT:    addq $-1, %rax
+; X64-NEXT:    adcq $-1, %r9
+; X64-NEXT:    adcq $1, %rbp
+; X64-NEXT:    andl $1, %ebp
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    orq %rbp, %rdi
+; X64-NEXT:    orq %r9, %rdi
+; X64-NEXT:    movl $0, %ebx
+; X64-NEXT:    movl $0, %r8d
+; X64-NEXT:    movq %r11, %r14
+; X64-NEXT:    movq %rcx, %r15
+; X64-NEXT:    jne .LBB1_4
+; X64-NEXT:    jmp .LBB1_5
+; X64-NEXT:  .LBB1_7:
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r15, %rcx
+; X64-NEXT:  .LBB1_5: # %udiv-loop-exit
+; X64-NEXT:    movq %rcx, %r15
+; X64-NEXT:    shldq $1, %r10, %r15
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    orq %rbx, %r15
+; X64-NEXT:    shrq $63, %rcx
+; X64-NEXT:    addq %r10, %r10
+; X64-NEXT:    orq %r12, %r10
+; X64-NEXT:    orl %ecx, %ebx
+; X64-NEXT:    andl $1, %ebx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:  .LBB1_6: # %udiv-end
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %rcx, %rdi
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    adcq %rbp, %rdx
+; X64-NEXT:    imulq %r15, %r13
+; X64-NEXT:    addq %rdx, %r13
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    imulq %r10, %rcx
+; X64-NEXT:    imulq %rsi, %rbx
+; X64-NEXT:    addq %rcx, %rbx
+; X64-NEXT:    addq %r13, %rbx
+; X64-NEXT:    subq %r8, %r11
+; X64-NEXT:    sbbq %rax, %r12
+; X64-NEXT:    sbbq %rbx, %r14
+; X64-NEXT:    andl $1, %r14d
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    movq %r12, %rdx
+; X64-NEXT:    movq %r14, %rcx
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r12
+; X64-NEXT:    popq %r13
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %r15
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+  %res = urem i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @sdiv129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: sdiv129:
+; X86:       # %bb.0: # %_udiv-special-cases
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $116, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    andl $1, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    andl $1, %esi
+; X86-NEXT:    negl %esi
+; X86-NEXT:    xorl %esi, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    xorl %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    xorl %esi, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    xorl %esi, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    xorl %esi, %ebx
+; X86-NEXT:    subl %esi, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %esi, %ebp
+; X86-NEXT:    sbbl %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    xorl %edx, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    xorl %edx, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    xorl %edx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    xorl %edx, %edi
+; X86-NEXT:    subl %edx, %edi
+; X86-NEXT:    sbbl %edx, %eax
+; X86-NEXT:    sbbl %edx, %esi
+; X86-NEXT:    sbbl %edx, %ebx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %edx, %ecx
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    sete {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edi, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl %ebp, %edx
+; X86-NEXT:    sete %al
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jne .LBB2_1
+; X86-NEXT:  # %bb.2: # %_udiv-special-cases
+; X86-NEXT:    bsrl %esi, %ecx
+; X86-NEXT:    xorl $31, %ecx
+; X86-NEXT:    addl $32, %ecx
+; X86-NEXT:    jmp .LBB2_3
+; X86-NEXT:  .LBB2_1:
+; X86-NEXT:    bsrl %ebx, %ecx
+; X86-NEXT:    xorl $31, %ecx
+; X86-NEXT:  .LBB2_3: # %_udiv-special-cases
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne .LBB2_4
+; X86-NEXT:  # %bb.5: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:    addl $32, %edx
+; X86-NEXT:    jmp .LBB2_6
+; X86-NEXT:  .LBB2_4:
+; X86-NEXT:    bsrl %eax, %edx
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:  .LBB2_6: # %_udiv-special-cases
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    jne .LBB2_8
+; X86-NEXT:  # %bb.7: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %edx
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB2_8: # %_udiv-special-cases
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    jne .LBB2_9
+; X86-NEXT:  # %bb.10: # %_udiv-special-cases
+; X86-NEXT:    subl $-128, %eax
+; X86-NEXT:    jmp .LBB2_11
+; X86-NEXT:  .LBB2_9:
+; X86-NEXT:    bsrl %esi, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    addl $96, %eax
+; X86-NEXT:  .LBB2_11: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    subl $127, %eax
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    jne .LBB2_12
+; X86-NEXT:  # %bb.13: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    addl $32, %eax
+; X86-NEXT:    jmp .LBB2_14
+; X86-NEXT:  .LBB2_12:
+; X86-NEXT:    bsrl %edi, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:  .LBB2_14: # %_udiv-special-cases
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB2_15
+; X86-NEXT:  # %bb.16: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:    addl $32, %ebx
+; X86-NEXT:    jmp .LBB2_17
+; X86-NEXT:  .LBB2_15:
+; X86-NEXT:    bsrl %edx, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:  .LBB2_17: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    jne .LBB2_19
+; X86-NEXT:  # %bb.18: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %ebx
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:  .LBB2_19: # %_udiv-special-cases
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Reload
+; X86-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jne .LBB2_20
+; X86-NEXT:  # %bb.21: # %_udiv-special-cases
+; X86-NEXT:    subl $-128, %eax
+; X86-NEXT:    jmp .LBB2_22
+; X86-NEXT:  .LBB2_20:
+; X86-NEXT:    bsrl %edx, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    addl $96, %eax
+; X86-NEXT:  .LBB2_22: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    subl $127, %eax
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    sbbl %ebx, %ebx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %ebp, %eax
+; X86-NEXT:    sbbl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    sbbl %esi, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl %edi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl $128, %eax
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT:    cmpl %ecx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %ebp, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %ebx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %edx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %esi, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %edi, %eax
+; X86-NEXT:    setb %al
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
+; X86-NEXT:    orb %al, %dl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB2_24
+; X86-NEXT:  # %bb.23: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB2_24: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb %dl, %dl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB2_26
+; X86-NEXT:  # %bb.25: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:  .LBB2_26: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB2_27
+; X86-NEXT:  # %bb.140: # %_udiv-special-cases
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    xorl $128, %edx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB2_141
+; X86-NEXT:  # %bb.96: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    movb %al, %ch
+; X86-NEXT:    subb $64, %ch
+; X86-NEXT:    negb %ch
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB2_98
+; X86-NEXT:  # %bb.97: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB2_98: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb $-128, %cl
+; X86-NEXT:    movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    subb %dl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %eax
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %ebp
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB2_100
+; X86-NEXT:  # %bb.99:
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:  .LBB2_100: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    subb $64, %cl
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jb .LBB2_102
+; X86-NEXT:  # %bb.101: # %udiv-bb1
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB2_102: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB2_104
+; X86-NEXT:  # %bb.103: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB2_104: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    negb %cl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB2_106
+; X86-NEXT:  # %bb.105: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB2_106: # %udiv-bb1
+; X86-NEXT:    cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jae .LBB2_108
+; X86-NEXT:  # %bb.107:
+; X86-NEXT:    orl %ecx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB2_108: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB2_110
+; X86-NEXT:  # %bb.109: # %udiv-bb1
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB2_110: # %udiv-bb1
+; X86-NEXT:    cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    jae .LBB2_112
+; X86-NEXT:  # %bb.111:
+; X86-NEXT:    orl %eax, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB2_112: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %eax
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB2_114
+; X86-NEXT:  # %bb.113: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB2_114: # %udiv-bb1
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edi
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB2_116
+; X86-NEXT:  # %bb.115: # %udiv-bb1
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB2_116: # %udiv-bb1
+; X86-NEXT:    cmpb $64, %ch
+; X86-NEXT:    jae .LBB2_118
+; X86-NEXT:  # %bb.117:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB2_118: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    addl $1, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    testb %ch, %ch
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    je .LBB2_120
+; X86-NEXT:  # %bb.119: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:  .LBB2_120: # %udiv-bb1
+; X86-NEXT:    negb %ch
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB2_122
+; X86-NEXT:  # %bb.121: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB2_122: # %udiv-bb1
+; X86-NEXT:    cmpb $64, %ch
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jb .LBB2_124
+; X86-NEXT:  # %bb.123: # %udiv-bb1
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:  .LBB2_124: # %udiv-bb1
+; X86-NEXT:    andl $1, %ebx
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    js .LBB2_125
+; X86-NEXT:  # %bb.126: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB2_127
+; X86-NEXT:  .LBB2_128: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    js .LBB2_129
+; X86-NEXT:  .LBB2_130: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB2_131
+; X86-NEXT:  .LBB2_132: # %udiv-bb1
+; X86-NEXT:    js .LBB2_133
+; X86-NEXT:  .LBB2_134: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB2_136
+; X86-NEXT:  .LBB2_135: # %udiv-bb1
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB2_136: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    jns .LBB2_138
+; X86-NEXT:  # %bb.137: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB2_138: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %ebx
+; X86-NEXT:    je .LBB2_139
+; X86-NEXT:  # %bb.30: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB2_31
+; X86-NEXT:  # %bb.32: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB2_33
+; X86-NEXT:  .LBB2_27:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jmp .LBB2_141
+; X86-NEXT:  .LBB2_125: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB2_128
+; X86-NEXT:  .LBB2_127: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    jns .LBB2_130
+; X86-NEXT:  .LBB2_129: # %udiv-bb1
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB2_132
+; X86-NEXT:  .LBB2_131: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jns .LBB2_134
+; X86-NEXT:  .LBB2_133: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB2_135
+; X86-NEXT:    jmp .LBB2_136
+; X86-NEXT:  .LBB2_139:
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB2_29
+; X86-NEXT:  .LBB2_31:
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB2_33: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    subb $64, %bl
+; X86-NEXT:    jb .LBB2_35
+; X86-NEXT:  # %bb.34: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB2_35: # %udiv-preheader
+; X86-NEXT:    negb %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shldl %cl, %ebp, %edi
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    jne .LBB2_37
+; X86-NEXT:  # %bb.36: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB2_37: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    movb %al, %bh
+; X86-NEXT:    addb $-64, %bh
+; X86-NEXT:    movb %bh, %cl
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %bh
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB2_39
+; X86-NEXT:  # %bb.38: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB2_39: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpb $64, %al
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    jb .LBB2_40
+; X86-NEXT:  # %bb.41: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB2_42
+; X86-NEXT:  .LBB2_40:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB2_42: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB2_44
+; X86-NEXT:  # %bb.43: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:  .LBB2_44: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    jne .LBB2_46
+; X86-NEXT:  # %bb.45: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB2_46: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb %bh, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edi
+; X86-NEXT:    testb $32, %bh
+; X86-NEXT:    jne .LBB2_48
+; X86-NEXT:  # %bb.47: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB2_48: # %udiv-preheader
+; X86-NEXT:    cmpb $64, (%esp) # 1-byte Folded Reload
+; X86-NEXT:    jae .LBB2_50
+; X86-NEXT:  # %bb.49:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB2_50: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB2_52
+; X86-NEXT:  # %bb.51: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB2_52: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shldl %cl, %eax, %edi
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    jne .LBB2_54
+; X86-NEXT:  # %bb.53: # %udiv-preheader
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:  .LBB2_54: # %udiv-preheader
+; X86-NEXT:    movb $-128, %ch
+; X86-NEXT:    subb (%esp), %ch # 1-byte Folded Reload
+; X86-NEXT:    cmpb $64, %ch
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB2_56
+; X86-NEXT:  # %bb.55: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:  .LBB2_56: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %ebx
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB2_57
+; X86-NEXT:  # %bb.58: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB2_59
+; X86-NEXT:  .LBB2_57:
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB2_59: # %udiv-preheader
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    subb $64, %cl
+; X86-NEXT:    jb .LBB2_61
+; X86-NEXT:  # %bb.60: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB2_61: # %udiv-preheader
+; X86-NEXT:    negb %cl
+; X86-NEXT:    shrdl %cl, %ebp, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB2_63
+; X86-NEXT:  # %bb.62: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB2_63: # %udiv-preheader
+; X86-NEXT:    cmpb $64, %ch
+; X86-NEXT:    jb .LBB2_65
+; X86-NEXT:  # %bb.64: # %udiv-preheader
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB2_65: # %udiv-preheader
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    cmpb $0, (%esp) # 1-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB2_67
+; X86-NEXT:  # %bb.66: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB2_67: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb %ch, %ch
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    je .LBB2_69
+; X86-NEXT:  # %bb.68: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB2_69: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    cmpb $0, (%esp) # 1-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB2_70
+; X86-NEXT:  # %bb.71: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    js .LBB2_72
+; X86-NEXT:  .LBB2_73: # %udiv-preheader
+; X86-NEXT:    je .LBB2_75
+; X86-NEXT:  .LBB2_74: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB2_75: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    movb %al, %cl
+; X86-NEXT:    addb $-128, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    je .LBB2_76
+; X86-NEXT:  # %bb.77: # %udiv-preheader
+; X86-NEXT:    cmpb $64, %cl
+; X86-NEXT:    jae .LBB2_78
+; X86-NEXT:  .LBB2_79: # %udiv-preheader
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    je .LBB2_81
+; X86-NEXT:  .LBB2_80: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB2_81: # %udiv-preheader
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    jns .LBB2_83
+; X86-NEXT:  # %bb.82: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB2_83: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB2_85
+; X86-NEXT:  # %bb.84: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB2_85: # %udiv-preheader
+; X86-NEXT:    testb %ch, %ch
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    je .LBB2_87
+; X86-NEXT:  # %bb.86: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB2_87: # %udiv-preheader
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    cmpb $0, (%esp) # 1-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jns .LBB2_89
+; X86-NEXT:  # %bb.88: # %udiv-preheader
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:  .LBB2_89: # %udiv-preheader
+; X86-NEXT:    je .LBB2_91
+; X86-NEXT:  # %bb.90: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB2_91: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %ecx, %ebp
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    cmpb $0, (%esp) # 1-byte Folded Reload
+; X86-NEXT:    jns .LBB2_93
+; X86-NEXT:  # %bb.92: # %udiv-preheader
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:  .LBB2_93: # %udiv-preheader
+; X86-NEXT:    je .LBB2_95
+; X86-NEXT:  # %bb.94: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB2_95: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    addl $-1, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    adcl $-1, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl $1, %esi
+; X86-NEXT:    andl $1, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB2_28: # %udiv-do-while
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %ebx
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    andl $1, %ebp
+; X86-NEXT:    leal (%ebp,%ecx,2), %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebp, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %ebp
+; X86-NEXT:    orl %esi, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebp, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shrl $31, %edx
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    addl %ebp, %ebp
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    andl $1, %ebp
+; X86-NEXT:    cmpl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl %ebx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %edx
+; X86-NEXT:    andl $1, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    subl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    sbbl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl (%esp), %esi # 4-byte Reload
+; X86-NEXT:    addl $-1, %esi
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    adcl $-1, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %edx
+; X86-NEXT:    adcl $1, %ecx
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    jne .LBB2_28
+; X86-NEXT:  .LBB2_29: # %udiv-loop-exit
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebp, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebx, %ebp
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl $1, %esi, %ebx
+; X86-NEXT:    orl %eax, %ebx
+; X86-NEXT:    shrl $31, %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    addl %esi, %esi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB2_141: # %udiv-end
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    xorl %edi, %edx
+; X86-NEXT:    xorl %edi, %ebp
+; X86-NEXT:    xorl %edi, %ebx
+; X86-NEXT:    xorl %edi, %esi
+; X86-NEXT:    subl %edi, %esi
+; X86-NEXT:    sbbl %edi, %ebx
+; X86-NEXT:    sbbl %edi, %ebp
+; X86-NEXT:    sbbl %edi, %edx
+; X86-NEXT:    sbbl %eax, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %esi, (%eax)
+; X86-NEXT:    movl %ebx, 4(%eax)
+; X86-NEXT:    movl %ebp, 8(%eax)
+; X86-NEXT:    movl %edx, 12(%eax)
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    movb %cl, 16(%eax)
+; X86-NEXT:    addl $116, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+; X86-NEXT:  .LBB2_70: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jns .LBB2_73
+; X86-NEXT:  .LBB2_72: # %udiv-preheader
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB2_74
+; X86-NEXT:    jmp .LBB2_75
+; X86-NEXT:  .LBB2_76: # %udiv-preheader
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    cmpb $64, %cl
+; X86-NEXT:    jb .LBB2_79
+; X86-NEXT:  .LBB2_78: # %udiv-preheader
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    jne .LBB2_80
+; X86-NEXT:    jmp .LBB2_81
+;
+; X64-LABEL: sdiv129:
+; X64:       # %bb.0: # %_udiv-special-cases
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %r13
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movq %r8, %rbx
+; X64-NEXT:    movq %rdx, %r8
+; X64-NEXT:    movq %rsi, %r12
+; X64-NEXT:    movq %rdi, %r10
+; X64-NEXT:    movl %r9d, %r11d
+; X64-NEXT:    andl $1, %r11d
+; X64-NEXT:    negq %r11
+; X64-NEXT:    movl %r8d, %eax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    xorq %rax, %r8
+; X64-NEXT:    xorq %rax, %r12
+; X64-NEXT:    xorq %rax, %r10
+; X64-NEXT:    subq %rax, %r10
+; X64-NEXT:    sbbq %rax, %r12
+; X64-NEXT:    sbbq %rax, %r8
+; X64-NEXT:    movl %r8d, %r14d
+; X64-NEXT:    andl $1, %r14d
+; X64-NEXT:    xorq %r11, %r9
+; X64-NEXT:    xorq %r11, %rbx
+; X64-NEXT:    xorq %r11, %rcx
+; X64-NEXT:    subq %r11, %rcx
+; X64-NEXT:    sbbq %r11, %rbx
+; X64-NEXT:    sbbq %r11, %r9
+; X64-NEXT:    andl $1, %r9d
+; X64-NEXT:    movq %r11, %rdx
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    xorl %eax, %r11d
+; X64-NEXT:    andl $1, %r11d
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    orq %r9, %rax
+; X64-NEXT:    orq %rbx, %rax
+; X64-NEXT:    sete {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    orq %r14, %rax
+; X64-NEXT:    orq %r12, %rax
+; X64-NEXT:    sete {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill
+; X64-NEXT:    bsrq %rbx, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    bsrq %rcx, %rsi
+; X64-NEXT:    xorq $63, %rsi
+; X64-NEXT:    addq $64, %rsi
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testq %rbx, %rbx
+; X64-NEXT:    cmovneq %rax, %rsi
+; X64-NEXT:    subq $-128, %rsi
+; X64-NEXT:    bsrq %r9, %rbx
+; X64-NEXT:    xorq $63, %rbx
+; X64-NEXT:    addq $64, %rbx
+; X64-NEXT:    testq %r9, %r9
+; X64-NEXT:    cmoveq %rsi, %rbx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    subq $127, %rbx
+; X64-NEXT:    movl $0, %r13d
+; X64-NEXT:    sbbq %r13, %r13
+; X64-NEXT:    movl $0, %r15d
+; X64-NEXT:    sbbq %r15, %r15
+; X64-NEXT:    movl $0, %esi
+; X64-NEXT:    sbbq %rsi, %rsi
+; X64-NEXT:    bsrq %r12, %rdi
+; X64-NEXT:    xorq $63, %rdi
+; X64-NEXT:    bsrq %r10, %rbp
+; X64-NEXT:    xorq $63, %rbp
+; X64-NEXT:    addq $64, %rbp
+; X64-NEXT:    testq %r12, %r12
+; X64-NEXT:    cmovneq %rdi, %rbp
+; X64-NEXT:    subq $-128, %rbp
+; X64-NEXT:    bsrq %r14, %rdi
+; X64-NEXT:    xorq $63, %rdi
+; X64-NEXT:    addq $64, %rdi
+; X64-NEXT:    testq %r14, %r14
+; X64-NEXT:    cmoveq %rbp, %rdi
+; X64-NEXT:    subq $127, %rdi
+; X64-NEXT:    movl $0, %ebp
+; X64-NEXT:    sbbq %rbp, %rbp
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    movl $0, %edx
+; X64-NEXT:    sbbq %rdx, %rdx
+; X64-NEXT:    subq %rdi, %rbx
+; X64-NEXT:    sbbq %rbp, %r13
+; X64-NEXT:    sbbq %rcx, %r15
+; X64-NEXT:    sbbq %rdx, %rsi
+; X64-NEXT:    movl $128, %ecx
+; X64-NEXT:    cmpq %rbx, %rcx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %r13, %rcx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %r15, %rcx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rsi, %rcx
+; X64-NEXT:    setb %cl
+; X64-NEXT:    orb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Folded Reload
+; X64-NEXT:    orb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Folded Reload
+; X64-NEXT:    cmovneq %rax, %r8
+; X64-NEXT:    andl $1, %r8d
+; X64-NEXT:    testb %cl, %cl
+; X64-NEXT:    movq %r12, %rdx
+; X64-NEXT:    cmovneq %rax, %rdx
+; X64-NEXT:    cmoveq %r10, %rax
+; X64-NEXT:    jne .LBB2_8
+; X64-NEXT:  # %bb.1: # %_udiv-special-cases
+; X64-NEXT:    andl $1, %r15d
+; X64-NEXT:    movq %rbx, %rcx
+; X64-NEXT:    xorq $128, %rcx
+; X64-NEXT:    orq %r15, %rcx
+; X64-NEXT:    orq %r13, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    je .LBB2_9
+; X64-NEXT:  # %bb.2: # %udiv-bb1
+; X64-NEXT:    movq %r10, %rdi
+; X64-NEXT:    movq %r12, %rsi
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl %ebx, %edx
+; X64-NEXT:    movb $-128, %r8b
+; X64-NEXT:    subb %bl, %r8b
+; X64-NEXT:    movq %r14, %r10
+; X64-NEXT:    movl %r8d, %ecx
+; X64-NEXT:    shlq %cl, %r10
+; X64-NEXT:    movq %rdi, %r9
+; X64-NEXT:    movq %rdi, %rbp
+; X64-NEXT:    shlq %cl, %rbp
+; X64-NEXT:    xorl %r12d, %r12d
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    cmovneq %r12, %rax
+; X64-NEXT:    cmovneq %r12, %r10
+; X64-NEXT:    movq %rsi, %rdi
+; X64-NEXT:    shldq %cl, %r9, %rsi
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    cmovneq %rbp, %rsi
+; X64-NEXT:    addq $1, %rbx
+; X64-NEXT:    adcq $0, %r13
+; X64-NEXT:    adcq $0, %r15
+; X64-NEXT:    andl $1, %r15d
+; X64-NEXT:    movq %r9, %rbp
+; X64-NEXT:    movl %edx, %ecx
+; X64-NEXT:    shrdq %cl, %rdi, %rbp
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    shrq %cl, %rdi
+; X64-NEXT:    testb $64, %dl
+; X64-NEXT:    cmoveq %rbp, %rdi
+; X64-NEXT:    orl %edi, %r10d
+; X64-NEXT:    negb %dl
+; X64-NEXT:    movq %r9, %rdi
+; X64-NEXT:    movl %edx, %ecx
+; X64-NEXT:    shlq %cl, %rdi
+; X64-NEXT:    testb $64, %dl
+; X64-NEXT:    cmovneq %r12, %rdi
+; X64-NEXT:    testb %r8b, %r8b
+; X64-NEXT:    cmovsq %r12, %rsi
+; X64-NEXT:    cmovsq %rdi, %r10
+; X64-NEXT:    cmoveq %r14, %r10
+; X64-NEXT:    cmovsq %r12, %rax
+; X64-NEXT:    movq %rbx, %rcx
+; X64-NEXT:    orq %r15, %rcx
+; X64-NEXT:    orq %r13, %rcx
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    je .LBB2_6
+; X64-NEXT:  # %bb.3: # %udiv-preheader
+; X64-NEXT:    andl $1, %r10d
+; X64-NEXT:    movq %r9, %rdx
+; X64-NEXT:    movl %ebx, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %r12, %rdx
+; X64-NEXT:    movq %r12, %rdi
+; X64-NEXT:    shrq %cl, %rdi
+; X64-NEXT:    xorl %r8d, %r8d
+; X64-NEXT:    testb $64, %bl
+; X64-NEXT:    cmovneq %rdi, %rdx
+; X64-NEXT:    cmovneq %r8, %rdi
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    subb %bl, %cl
+; X64-NEXT:    xorl %r11d, %r11d
+; X64-NEXT:    shldq %cl, %r14, %r11
+; X64-NEXT:    movq %r14, %rbp
+; X64-NEXT:    shlq %cl, %rbp
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rbp, %r11
+; X64-NEXT:    cmovneq %r8, %rbp
+; X64-NEXT:    orq %rdx, %rbp
+; X64-NEXT:    orq %rdi, %r11
+; X64-NEXT:    leal -128(%rbx), %ecx
+; X64-NEXT:    shrdq %cl, %r8, %r14
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r8, %r14
+; X64-NEXT:    testb %bl, %bl
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovsq %r8, %r11
+; X64-NEXT:    cmoveq %r12, %r11
+; X64-NEXT:    cmovnsq %rbp, %r14
+; X64-NEXT:    cmoveq %r9, %r14
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    addq $-1, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    adcq $-1, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    adcq $1, %rdx
+; X64-NEXT:    andl $1, %edx
+; X64-NEXT:    xorl %r12d, %r12d
+; X64-NEXT:    xorl %r9d, %r9d
+; X64-NEXT:    xorl %ebp, %ebp
+; X64-NEXT:    movq %r11, %rdi
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB2_4: # %udiv-do-while
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    shldq $1, %r14, %rdi
+; X64-NEXT:    shrq $63, %r11
+; X64-NEXT:    andl $1, %r10d
+; X64-NEXT:    leaq (%r10,%r14,2), %r14
+; X64-NEXT:    shldq $1, %rax, %rcx
+; X64-NEXT:    orq %r9, %rcx
+; X64-NEXT:    shrq $63, %rsi
+; X64-NEXT:    addq %rax, %rax
+; X64-NEXT:    orq %r12, %rax
+; X64-NEXT:    orl %esi, %ebp
+; X64-NEXT:    movl %ebp, %r10d
+; X64-NEXT:    andl $1, %r10d
+; X64-NEXT:    cmpq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Reload
+; X64-NEXT:    movq %r8, %rsi
+; X64-NEXT:    sbbq %rdi, %rsi
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    sbbq %r11, %rsi
+; X64-NEXT:    andl $1, %esi
+; X64-NEXT:    negq %rsi
+; X64-NEXT:    movl %esi, %r12d
+; X64-NEXT:    andl $1, %r12d
+; X64-NEXT:    movq %rsi, %rbp
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    subq %rsi, %r14
+; X64-NEXT:    sbbq %rbp, %rdi
+; X64-NEXT:    addq $-1, %rbx
+; X64-NEXT:    adcq $-1, %r13
+; X64-NEXT:    adcq $1, %r15
+; X64-NEXT:    andl $1, %r15d
+; X64-NEXT:    movq %rbx, %rsi
+; X64-NEXT:    orq %r15, %rsi
+; X64-NEXT:    orq %r13, %rsi
+; X64-NEXT:    movl $0, %r9d
+; X64-NEXT:    movl $0, %ebp
+; X64-NEXT:    movq %rdi, %r11
+; X64-NEXT:    movq %rcx, %rsi
+; X64-NEXT:    jne .LBB2_4
+; X64-NEXT:    jmp .LBB2_7
+; X64-NEXT:  .LBB2_6:
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:  .LBB2_7: # %udiv-loop-exit
+; X64-NEXT:    movq %rcx, %rdx
+; X64-NEXT:    shldq $1, %rax, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    orq %r8, %rdx
+; X64-NEXT:    shrq $63, %rcx
+; X64-NEXT:    addq %rax, %rax
+; X64-NEXT:    orq %r12, %rax
+; X64-NEXT:    orl %ecx, %r8d
+; X64-NEXT:    andl $1, %r8d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:  .LBB2_8: # %udiv-end
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:  .LBB2_9: # %udiv-end
+; X64-NEXT:    xorq %r11, %r8
+; X64-NEXT:    xorq %rcx, %rdx
+; X64-NEXT:    xorq %rcx, %rax
+; X64-NEXT:    subq %rcx, %rax
+; X64-NEXT:    sbbq %rcx, %rdx
+; X64-NEXT:    sbbq %r11, %r8
+; X64-NEXT:    andl $1, %r8d
+; X64-NEXT:    movq %r8, %rcx
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r12
+; X64-NEXT:    popq %r13
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %r15
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+  %res = sdiv i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @srem129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: srem129:
+; X86:       # %bb.0: # %_udiv-special-cases
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $136, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    negl %eax
+; X86-NEXT:    xorl %eax, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    xorl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    xorl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    xorl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    xorl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %ecx, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    xorl %ecx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    xorl %ecx, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    xorl %ecx, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    xorl %ecx, %esi
+; X86-NEXT:    subl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $1, %edi
+; X86-NEXT:    subl %ecx, %esi
+; X86-NEXT:    sbbl %ecx, %edx
+; X86-NEXT:    sbbl %ecx, %ebx
+; X86-NEXT:    sbbl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %ecx, %ebp
+; X86-NEXT:    andl $1, %ebp
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    sete (%esp) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jne .LBB3_1
+; X86-NEXT:  # %bb.2: # %_udiv-special-cases
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    bsrl %ebx, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:    addl $32, %ebx
+; X86-NEXT:    jmp .LBB3_3
+; X86-NEXT:  .LBB3_1:
+; X86-NEXT:    bsrl %edi, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:  .LBB3_3: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb (%esp), %dl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    jne .LBB3_4
+; X86-NEXT:  # %bb.5: # %_udiv-special-cases
+; X86-NEXT:    bsrl %eax, %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:    addl $32, %esi
+; X86-NEXT:    jmp .LBB3_6
+; X86-NEXT:  .LBB3_4:
+; X86-NEXT:    bsrl %esi, %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:  .LBB3_6: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    jne .LBB3_8
+; X86-NEXT:  # %bb.7: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %esi
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:  .LBB3_8: # %_udiv-special-cases
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB3_9
+; X86-NEXT:  # %bb.10: # %_udiv-special-cases
+; X86-NEXT:    subl $-128, %ebx
+; X86-NEXT:    jmp .LBB3_11
+; X86-NEXT:  .LBB3_9:
+; X86-NEXT:    bsrl %ecx, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:    addl $96, %ebx
+; X86-NEXT:  .LBB3_11: # %_udiv-special-cases
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    subl $127, %ebx
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    jne .LBB3_12
+; X86-NEXT:  # %bb.13: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %ecx
+; X86-NEXT:    addl $32, %ecx
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    je .LBB3_16
+; X86-NEXT:  .LBB3_15:
+; X86-NEXT:    bsrl %eax, %ebp
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:    jmp .LBB3_17
+; X86-NEXT:  .LBB3_12:
+; X86-NEXT:    bsrl %ecx, %ecx
+; X86-NEXT:    xorl $31, %ecx
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne .LBB3_15
+; X86-NEXT:  .LBB3_16: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:    addl $32, %ebp
+; X86-NEXT:  .LBB3_17: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    jne .LBB3_19
+; X86-NEXT:  # %bb.18: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %ebp
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB3_19: # %_udiv-special-cases
+; X86-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %dl, (%esp) # 1-byte Spill
+; X86-NEXT:    jne .LBB3_20
+; X86-NEXT:  # %bb.21: # %_udiv-special-cases
+; X86-NEXT:    subl $-128, %ecx
+; X86-NEXT:    jmp .LBB3_22
+; X86-NEXT:  .LBB3_20:
+; X86-NEXT:    bsrl %eax, %ecx
+; X86-NEXT:    xorl $31, %ecx
+; X86-NEXT:    addl $96, %ecx
+; X86-NEXT:  .LBB3_22: # %_udiv-special-cases
+; X86-NEXT:    subl $127, %ecx
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %eax, %eax
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    sbbl %ebx, %ebx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    subl %ecx, %edx
+; X86-NEXT:    sbbl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl %ebx, %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl $128, %eax
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl %edx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %edi, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %ebx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %ecx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %esi, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %ebp, %eax
+; X86-NEXT:    setb %al
+; X86-NEXT:    movb (%esp), %cl # 1-byte Reload
+; X86-NEXT:    orb %al, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB3_24
+; X86-NEXT:  # %bb.23: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB3_24: # %_udiv-special-cases
+; X86-NEXT:    andl $1, %edx
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB3_26
+; X86-NEXT:  # %bb.25: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB3_26: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB3_34
+; X86-NEXT:  # %bb.27: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    xorl $128, %ecx
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    orl %ebp, %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    je .LBB3_34
+; X86-NEXT:  # %bb.28: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    # kill: def $bl killed $bl killed $ebx
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    subb $64, %cl
+; X86-NEXT:    negb %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB3_30
+; X86-NEXT:  # %bb.29: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB3_30: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb $-128, %cl
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movb %bl, %ch
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %edx
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movb %cl, (%esp) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jne .LBB3_31
+; X86-NEXT:  # %bb.100: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB3_101
+; X86-NEXT:  .LBB3_31:
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB3_101: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb (%esp), %ch # 1-byte Reload
+; X86-NEXT:    subb $64, %ch
+; X86-NEXT:    jb .LBB3_103
+; X86-NEXT:  # %bb.102: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB3_103: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    shldl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB3_105
+; X86-NEXT:  # %bb.104: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB3_105: # %udiv-bb1
+; X86-NEXT:    negb %ch
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB3_107
+; X86-NEXT:  # %bb.106: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB3_107: # %udiv-bb1
+; X86-NEXT:    cmpb $64, (%esp) # 1-byte Folded Reload
+; X86-NEXT:    jae .LBB3_109
+; X86-NEXT:  # %bb.108:
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB3_109: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB3_111
+; X86-NEXT:  # %bb.110: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB3_111: # %udiv-bb1
+; X86-NEXT:    cmpb $64, (%esp) # 1-byte Folded Reload
+; X86-NEXT:    jae .LBB3_113
+; X86-NEXT:  # %bb.112:
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB3_113: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB3_115
+; X86-NEXT:  # %bb.114: # %udiv-bb1
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB3_115: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edi
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB3_117
+; X86-NEXT:  # %bb.116: # %udiv-bb1
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:  .LBB3_117: # %udiv-bb1
+; X86-NEXT:    cmpb $64, %cl
+; X86-NEXT:    jae .LBB3_119
+; X86-NEXT:  # %bb.118:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB3_119: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    addl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    je .LBB3_121
+; X86-NEXT:  # %bb.120: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB3_121: # %udiv-bb1
+; X86-NEXT:    negb %cl
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB3_123
+; X86-NEXT:  # %bb.122: # %udiv-bb1
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB3_123: # %udiv-bb1
+; X86-NEXT:    cmpb $64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jb .LBB3_125
+; X86-NEXT:  # %bb.124: # %udiv-bb1
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB3_125: # %udiv-bb1
+; X86-NEXT:    andl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    orl %edx, %ebx
+; X86-NEXT:    cmpb $0, (%esp) # 1-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jns .LBB3_127
+; X86-NEXT:  # %bb.126: # %udiv-bb1
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:  .LBB3_127: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB3_129
+; X86-NEXT:  # %bb.128: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB3_129: # %udiv-bb1
+; X86-NEXT:    jns .LBB3_131
+; X86-NEXT:  # %bb.130: # %udiv-bb1
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:  .LBB3_131: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    je .LBB3_133
+; X86-NEXT:  # %bb.132: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB3_133: # %udiv-bb1
+; X86-NEXT:    jns .LBB3_135
+; X86-NEXT:  # %bb.134: # %udiv-bb1
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB3_135: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    je .LBB3_137
+; X86-NEXT:  # %bb.136: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB3_137: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jns .LBB3_139
+; X86-NEXT:  # %bb.138: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB3_139: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    je .LBB3_140
+; X86-NEXT:  # %bb.35: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB3_36
+; X86-NEXT:  # %bb.37: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB3_38
+; X86-NEXT:  .LBB3_140:
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jmp .LBB3_33
+; X86-NEXT:  .LBB3_36:
+; X86-NEXT:    movl %edi, (%esp) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB3_38: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    subb $64, %al
+; X86-NEXT:    jb .LBB3_40
+; X86-NEXT:  # %bb.39: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB3_40: # %udiv-preheader
+; X86-NEXT:    negb %al
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shldl %cl, %ebp, %ebx
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    jne .LBB3_42
+; X86-NEXT:  # %bb.41: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:  .LBB3_42: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb %bl, %ah
+; X86-NEXT:    addb $-64, %ah
+; X86-NEXT:    movb %ah, %cl
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %ah
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB3_44
+; X86-NEXT:  # %bb.43: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB3_44: # %udiv-preheader
+; X86-NEXT:    cmpb $64, %bl
+; X86-NEXT:    jb .LBB3_45
+; X86-NEXT:  # %bb.46: # %udiv-preheader
+; X86-NEXT:    movl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB3_47
+; X86-NEXT:  .LBB3_45:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:  .LBB3_47: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB3_49
+; X86-NEXT:  # %bb.48: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB3_49: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB3_51
+; X86-NEXT:  # %bb.50: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:  .LBB3_51: # %udiv-preheader
+; X86-NEXT:    movb %ah, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %edi
+; X86-NEXT:    testb $32, %ah
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB3_53
+; X86-NEXT:  # %bb.52: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB3_53: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpb $64, %bl
+; X86-NEXT:    jae .LBB3_55
+; X86-NEXT:  # %bb.54:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB3_55: # %udiv-preheader
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB3_57
+; X86-NEXT:  # %bb.56: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:  .LBB3_57: # %udiv-preheader
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shldl %cl, %edx, %edi
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    jne .LBB3_59
+; X86-NEXT:  # %bb.58: # %udiv-preheader
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB3_59: # %udiv-preheader
+; X86-NEXT:    movb $-128, %ch
+; X86-NEXT:    subb %bl, %ch
+; X86-NEXT:    cmpb $64, %ch
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jb .LBB3_61
+; X86-NEXT:  # %bb.60: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:  .LBB3_61: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    je .LBB3_63
+; X86-NEXT:  # %bb.62:
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB3_63: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    subb $64, %cl
+; X86-NEXT:    jb .LBB3_65
+; X86-NEXT:  # %bb.64: # %udiv-preheader
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB3_65: # %udiv-preheader
+; X86-NEXT:    negb %cl
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    shrdl %cl, %eax, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB3_67
+; X86-NEXT:  # %bb.66: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB3_67: # %udiv-preheader
+; X86-NEXT:    cmpb $64, %ch
+; X86-NEXT:    jb .LBB3_69
+; X86-NEXT:  # %bb.68: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB3_69: # %udiv-preheader
+; X86-NEXT:    andl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB3_71
+; X86-NEXT:  # %bb.70: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB3_71: # %udiv-preheader
+; X86-NEXT:    testb %ch, %ch
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    je .LBB3_73
+; X86-NEXT:  # %bb.72: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:  .LBB3_73: # %udiv-preheader
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    testb %bl, %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB3_75
+; X86-NEXT:  # %bb.74: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB3_75: # %udiv-preheader
+; X86-NEXT:    jns .LBB3_77
+; X86-NEXT:  # %bb.76: # %udiv-preheader
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:  .LBB3_77: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB3_79
+; X86-NEXT:  # %bb.78: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB3_79: # %udiv-preheader
+; X86-NEXT:    movb %bl, %cl
+; X86-NEXT:    addb $-128, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    shrdl %cl, %ebx, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB3_81
+; X86-NEXT:  # %bb.80: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:  .LBB3_81: # %udiv-preheader
+; X86-NEXT:    cmpb $64, %cl
+; X86-NEXT:    jb .LBB3_83
+; X86-NEXT:  # %bb.82: # %udiv-preheader
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:  .LBB3_83: # %udiv-preheader
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB3_85
+; X86-NEXT:  # %bb.84: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:  .LBB3_85: # %udiv-preheader
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    testb %bl, %bl
+; X86-NEXT:    jns .LBB3_87
+; X86-NEXT:  # %bb.86: # %udiv-preheader
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB3_87: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB3_89
+; X86-NEXT:  # %bb.88: # %udiv-preheader
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB3_89: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb %ch, %ch
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl (%esp), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB3_91
+; X86-NEXT:  # %bb.90: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB3_91: # %udiv-preheader
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    testb %bl, %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jns .LBB3_93
+; X86-NEXT:  # %bb.92: # %udiv-preheader
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:  .LBB3_93: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB3_95
+; X86-NEXT:  # %bb.94: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB3_95: # %udiv-preheader
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    testb %bl, %bl
+; X86-NEXT:    jns .LBB3_97
+; X86-NEXT:  # %bb.96: # %udiv-preheader
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:  .LBB3_97: # %udiv-preheader
+; X86-NEXT:    movl %edi, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB3_99
+; X86-NEXT:  # %bb.98: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB3_99: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    addl $-1, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    adcl $-1, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl $-1, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl $-1, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $1, %ecx
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB3_32: # %udiv-do-while
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebp, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    shrl $31, %ebx
+; X86-NEXT:    andl $1, %edi
+; X86-NEXT:    leal (%edi,%ebp,2), %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl $1, %ecx, %edx
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl $1, %edx, %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shrl $31, %esi
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    addl %edx, %edx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    andl $1, %edi
+; X86-NEXT:    cmpl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl (%esp), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl %esi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl %ebx, %ecx
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    subl %ecx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %ebx, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    addl $-1, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl $-1, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl $-1, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl $1, %ecx
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB3_32
+; X86-NEXT:  .LBB3_33: # %udiv-loop-exit
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    shldl $1, %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl $1, %ecx, %edx
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebp, %ecx
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shrl $31, %esi
+; X86-NEXT:    orl %edi, %esi
+; X86-NEXT:    addl %ebp, %ebp
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    andl $1, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB3_34: # %udiv-end
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl (%esp), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %edx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    adcl %edi, %edx
+; X86-NEXT:    imull %esi, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    addl %esi, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    adcl %ebx, %edx
+; X86-NEXT:    imull %edi, %ecx
+; X86-NEXT:    addl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    addl (%esp), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    adcl %ebp, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    addl %edx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    subl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    sbbl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    sbbl %edi, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    xorl %eax, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    xorl %ecx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    xorl %ecx, %edi
+; X86-NEXT:    xorl %ecx, %edx
+; X86-NEXT:    xorl %ecx, %esi
+; X86-NEXT:    subl %ecx, %esi
+; X86-NEXT:    sbbl %ecx, %edx
+; X86-NEXT:    sbbl %ecx, %edi
+; X86-NEXT:    sbbl %ecx, %ebp
+; X86-NEXT:    sbbl %eax, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %esi, (%eax)
+; X86-NEXT:    movl %edx, 4(%eax)
+; X86-NEXT:    movl %edi, 8(%eax)
+; X86-NEXT:    movl %ebp, 12(%eax)
+; X86-NEXT:    andl $1, %ebx
+; X86-NEXT:    movb %bl, 16(%eax)
+; X86-NEXT:    addl $136, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: srem129:
+; X64:       # %bb.0: # %_udiv-special-cases
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %r13
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movq %r9, %rbx
+; X64-NEXT:    movq %r8, %rbp
+; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    movq %rsi, %r13
+; X64-NEXT:    movl %ebx, %eax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    movl %r14d, %r15d
+; X64-NEXT:    andl $1, %r15d
+; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    negq %r15
+; X64-NEXT:    xorq %r15, %r14
+; X64-NEXT:    xorq %r15, %r13
+; X64-NEXT:    xorq %r15, %rdi
+; X64-NEXT:    xorq %rax, %rbx
+; X64-NEXT:    xorq %rax, %rbp
+; X64-NEXT:    xorq %rax, %rcx
+; X64-NEXT:    subq %r15, %rdi
+; X64-NEXT:    sbbq %r15, %r13
+; X64-NEXT:    sbbq %r15, %r14
+; X64-NEXT:    movl %r14d, %edx
+; X64-NEXT:    andl $1, %edx
+; X64-NEXT:    subq %rax, %rcx
+; X64-NEXT:    sbbq %rax, %rbp
+; X64-NEXT:    sbbq %rax, %rbx
+; X64-NEXT:    andl $1, %ebx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    orq %rbx, %rax
+; X64-NEXT:    orq %rbp, %rax
+; X64-NEXT:    sete %r8b
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    orq %rdx, %rax
+; X64-NEXT:    orq %r13, %rax
+; X64-NEXT:    sete %r9b
+; X64-NEXT:    bsrq %rbp, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    bsrq %rcx, %rsi
+; X64-NEXT:    xorq $63, %rsi
+; X64-NEXT:    addq $64, %rsi
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testq %rbp, %rbp
+; X64-NEXT:    cmovneq %rax, %rsi
+; X64-NEXT:    subq $-128, %rsi
+; X64-NEXT:    bsrq %rbx, %r11
+; X64-NEXT:    xorq $63, %r11
+; X64-NEXT:    addq $64, %r11
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testq %rbx, %rbx
+; X64-NEXT:    cmoveq %rsi, %r11
+; X64-NEXT:    xorl %r12d, %r12d
+; X64-NEXT:    subq $127, %r11
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movl $0, %esi
+; X64-NEXT:    sbbq %rsi, %rsi
+; X64-NEXT:    movl $0, %r10d
+; X64-NEXT:    sbbq %r10, %r10
+; X64-NEXT:    bsrq %r13, %rbx
+; X64-NEXT:    xorq $63, %rbx
+; X64-NEXT:    bsrq %rdi, %rcx
+; X64-NEXT:    xorq $63, %rcx
+; X64-NEXT:    addq $64, %rcx
+; X64-NEXT:    testq %r13, %r13
+; X64-NEXT:    cmovneq %rbx, %rcx
+; X64-NEXT:    subq $-128, %rcx
+; X64-NEXT:    bsrq %rdx, %rbx
+; X64-NEXT:    xorq $63, %rbx
+; X64-NEXT:    addq $64, %rbx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testq %rdx, %rdx
+; X64-NEXT:    cmoveq %rcx, %rbx
+; X64-NEXT:    subq $127, %rbx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    movl $0, %edx
+; X64-NEXT:    sbbq %rdx, %rdx
+; X64-NEXT:    movl $0, %ebp
+; X64-NEXT:    sbbq %rbp, %rbp
+; X64-NEXT:    subq %rbx, %r11
+; X64-NEXT:    sbbq %rcx, %rax
+; X64-NEXT:    sbbq %rdx, %rsi
+; X64-NEXT:    sbbq %rbp, %r10
+; X64-NEXT:    movl $128, %ecx
+; X64-NEXT:    cmpq %r11, %rcx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rax, %rcx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rsi, %rcx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %r10, %rcx
+; X64-NEXT:    setb %cl
+; X64-NEXT:    orb %r9b, %cl
+; X64-NEXT:    movq %rdi, %r9
+; X64-NEXT:    orb %r8b, %cl
+; X64-NEXT:    cmovneq %r12, %r14
+; X64-NEXT:    andl $1, %r14d
+; X64-NEXT:    testb %cl, %cl
+; X64-NEXT:    movq %r13, %rbx
+; X64-NEXT:    cmovneq %r12, %rbx
+; X64-NEXT:    cmoveq %rdi, %r12
+; X64-NEXT:    jne .LBB3_6
+; X64-NEXT:  # %bb.1: # %_udiv-special-cases
+; X64-NEXT:    andl $1, %esi
+; X64-NEXT:    movq %r11, %rcx
+; X64-NEXT:    xorq $128, %rcx
+; X64-NEXT:    orq %rsi, %rcx
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    je .LBB3_6
+; X64-NEXT:  # %bb.2: # %udiv-bb1
+; X64-NEXT:    movq %rax, %r10
+; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl %r11d, %edx
+; X64-NEXT:    movb $-128, %bl
+; X64-NEXT:    subb %r11b, %bl
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movq %rdi, %r14
+; X64-NEXT:    movl %ebx, %ecx
+; X64-NEXT:    shlq %cl, %r14
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    xorl %r8d, %r8d
+; X64-NEXT:    testb $64, %bl
+; X64-NEXT:    movq %rax, %r12
+; X64-NEXT:    cmovneq %r8, %r12
+; X64-NEXT:    cmovneq %r8, %r14
+; X64-NEXT:    movq %r13, %r15
+; X64-NEXT:    shldq %cl, %r9, %r15
+; X64-NEXT:    testb $64, %bl
+; X64-NEXT:    cmovneq %rax, %r15
+; X64-NEXT:    addq $1, %r11
+; X64-NEXT:    adcq $0, %r10
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    andl $1, %esi
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movl %edx, %ecx
+; X64-NEXT:    shrdq %cl, %r13, %rax
+; X64-NEXT:    movq %r13, %rbp
+; X64-NEXT:    shrq %cl, %rbp
+; X64-NEXT:    testb $64, %dl
+; X64-NEXT:    cmoveq %rax, %rbp
+; X64-NEXT:    orl %ebp, %r14d
+; X64-NEXT:    negb %dl
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movl %edx, %ecx
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    testb $64, %dl
+; X64-NEXT:    cmovneq %r8, %rax
+; X64-NEXT:    testb %bl, %bl
+; X64-NEXT:    cmovsq %r8, %r15
+; X64-NEXT:    cmovsq %rax, %r14
+; X64-NEXT:    cmoveq %rdi, %r14
+; X64-NEXT:    cmovsq %r8, %r12
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    orq %rsi, %rax
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %r10, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    je .LBB3_7
+; X64-NEXT:  # %bb.3: # %udiv-preheader
+; X64-NEXT:    andl $1, %r14d
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movl %r11d, %ecx
+; X64-NEXT:    shrdq %cl, %r13, %rax
+; X64-NEXT:    movq %r13, %rdx
+; X64-NEXT:    shrq %cl, %rdx
+; X64-NEXT:    xorl %r8d, %r8d
+; X64-NEXT:    testb $64, %r11b
+; X64-NEXT:    cmovneq %rdx, %rax
+; X64-NEXT:    cmovneq %r8, %rdx
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    subb %r11b, %cl
+; X64-NEXT:    movq %r13, %rbx
+; X64-NEXT:    xorl %r13d, %r13d
+; X64-NEXT:    shldq %cl, %rdi, %r13
+; X64-NEXT:    movq %rdi, %rbp
+; X64-NEXT:    shlq %cl, %rbp
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rbp, %r13
+; X64-NEXT:    cmovneq %r8, %rbp
+; X64-NEXT:    orq %rax, %rbp
+; X64-NEXT:    orq %rdx, %r13
+; X64-NEXT:    leal -128(%r11), %ecx
+; X64-NEXT:    movq %rdi, %rdx
+; X64-NEXT:    shrdq %cl, %r8, %rdx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r8, %rdx
+; X64-NEXT:    testb %r11b, %r11b
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovsq %r8, %r13
+; X64-NEXT:    cmoveq %rbx, %r13
+; X64-NEXT:    cmovnsq %rbp, %rdx
+; X64-NEXT:    cmoveq %r9, %rdx
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    addq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $1, %rax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    xorl %r8d, %r8d
+; X64-NEXT:    xorl %r10d, %r10d
+; X64-NEXT:    xorl %ebx, %ebx
+; X64-NEXT:    movq %r13, %r9
+; X64-NEXT:    movq %r15, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB3_4: # %udiv-do-while
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    shldq $1, %rdx, %r9
+; X64-NEXT:    shrq $63, %r13
+; X64-NEXT:    andl $1, %r14d
+; X64-NEXT:    leaq (%r14,%rdx,2), %rdx
+; X64-NEXT:    shldq $1, %r12, %rcx
+; X64-NEXT:    orq %r10, %rcx
+; X64-NEXT:    shrq $63, %r15
+; X64-NEXT:    addq %r12, %r12
+; X64-NEXT:    orq %r8, %r12
+; X64-NEXT:    orl %r15d, %ebx
+; X64-NEXT:    movl %ebx, %r14d
+; X64-NEXT:    andl $1, %r14d
+; X64-NEXT:    cmpq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    sbbq %r9, %rbp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    sbbq %r13, %rbp
+; X64-NEXT:    andl $1, %ebp
+; X64-NEXT:    negq %rbp
+; X64-NEXT:    movl %ebp, %r8d
+; X64-NEXT:    andl $1, %r8d
+; X64-NEXT:    movq %rbp, %rbx
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; X64-NEXT:    subq %rbp, %rdx
+; X64-NEXT:    sbbq %rbx, %r9
+; X64-NEXT:    addq $-1, %r11
+; X64-NEXT:    adcq $-1, %rdi
+; X64-NEXT:    adcq $1, %rsi
+; X64-NEXT:    andl $1, %esi
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    orq %rsi, %rax
+; X64-NEXT:    orq %rdi, %rax
+; X64-NEXT:    movl $0, %r10d
+; X64-NEXT:    movl $0, %ebx
+; X64-NEXT:    movq %r9, %r13
+; X64-NEXT:    movq %rcx, %r15
+; X64-NEXT:    jne .LBB3_4
+; X64-NEXT:    jmp .LBB3_5
+; X64-NEXT:  .LBB3_7:
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r15, %rcx
+; X64-NEXT:  .LBB3_5: # %udiv-loop-exit
+; X64-NEXT:    movq %rcx, %rbx
+; X64-NEXT:    shldq $1, %r12, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    orq %r14, %rbx
+; X64-NEXT:    shrq $63, %rcx
+; X64-NEXT:    addq %r12, %r12
+; X64-NEXT:    orq %r8, %r12
+; X64-NEXT:    orl %ecx, %r14d
+; X64-NEXT:    andl $1, %r14d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:  .LBB3_6: # %udiv-end
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    mulq %r12
+; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    mulq %r12
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    addq %r10, %rsi
+; X64-NEXT:    adcq $0, %rdi
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    mulq %rbx
+; X64-NEXT:    addq %rsi, %rax
+; X64-NEXT:    adcq %rdi, %rdx
+; X64-NEXT:    imulq %rbx, %rcx
+; X64-NEXT:    addq %rdx, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    imulq %r12, %rdx
+; X64-NEXT:    imulq %rbp, %r14
+; X64-NEXT:    addq %rdx, %r14
+; X64-NEXT:    addq %rcx, %r14
+; X64-NEXT:    subq %r8, %r9
+; X64-NEXT:    sbbq %rax, %r13
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    sbbq %r14, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    xorq %rax, %rcx
+; X64-NEXT:    xorq %r15, %r13
+; X64-NEXT:    xorq %r15, %r9
+; X64-NEXT:    subq %r15, %r9
+; X64-NEXT:    sbbq %r15, %r13
+; X64-NEXT:    sbbq %rax, %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movq %r13, %rdx
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r12
+; X64-NEXT:    popq %r13
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %r15
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+  %res = srem i129 %a, %b
+  ret i129 %res
+}
+
+; Some higher sizes
+define i257 @sdiv257(i257 %a, i257 %b) nounwind {
+; X86-LABEL: sdiv257:
+; X86:       # %bb.0: # %_udiv-special-cases
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $240, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    andl $1, %esi
+; X86-NEXT:    negl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    xorl %ecx, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    xorl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    xorl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    xorl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    xorl %ecx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    xorl %ecx, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    xorl %ecx, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    xorl %ecx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    xorl %ecx, %edx
+; X86-NEXT:    subl %ecx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %ecx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %ecx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %ecx, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    xorl %esi, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    xorl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    xorl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    xorl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    xorl %esi, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    xorl %esi, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    xorl %esi, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    xorl %esi, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %esi, %ecx
+; X86-NEXT:    subl %esi, %ecx
+; X86-NEXT:    sbbl %esi, %ebx
+; X86-NEXT:    sbbl %esi, %eax
+; X86-NEXT:    sbbl %esi, %ebp
+; X86-NEXT:    sbbl %esi, %edi
+; X86-NEXT:    sbbl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    andl $1, %edx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    sete %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    sete %al
+; X86-NEXT:    testl %ebp, %ebp
+; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jne .LBB4_1
+; X86-NEXT:  # %bb.2: # %_udiv-special-cases
+; X86-NEXT:    bsrl %esi, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    addl $32, %eax
+; X86-NEXT:    jmp .LBB4_3
+; X86-NEXT:  .LBB4_1:
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    bsrl %ebp, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:  .LBB4_3: # %_udiv-special-cases
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    jne .LBB4_4
+; X86-NEXT:  # %bb.5: # %_udiv-special-cases
+; X86-NEXT:    bsrl %ebp, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    addl $32, %eax
+; X86-NEXT:    jmp .LBB4_6
+; X86-NEXT:  .LBB4_4:
+; X86-NEXT:    bsrl %edi, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:  .LBB4_6: # %_udiv-special-cases
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB4_8
+; X86-NEXT:  # %bb.7: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_8: # %_udiv-special-cases
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB4_9
+; X86-NEXT:  # %bb.10: # %_udiv-special-cases
+; X86-NEXT:    bsrl %ebx, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    addl $32, %eax
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    je .LBB4_13
+; X86-NEXT:  .LBB4_12:
+; X86-NEXT:    bsrl %edx, %edx
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:    jmp .LBB4_14
+; X86-NEXT:  .LBB4_9:
+; X86-NEXT:    bsrl %esi, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    jne .LBB4_12
+; X86-NEXT:  .LBB4_13: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:    addl $32, %edx
+; X86-NEXT:  .LBB4_14: # %_udiv-special-cases
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB4_16
+; X86-NEXT:  # %bb.15: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %edx
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB4_16: # %_udiv-special-cases
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB4_18
+; X86-NEXT:  # %bb.17: # %_udiv-special-cases
+; X86-NEXT:    subl $-128, %eax
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB4_18: # %_udiv-special-cases
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    addl $256, %ebx # imm = 0x100
+; X86-NEXT:    setb %al
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jne .LBB4_19
+; X86-NEXT:  # %bb.20: # %_udiv-special-cases
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movb %al, %dl
+; X86-NEXT:    jmp .LBB4_21
+; X86-NEXT:  .LBB4_19:
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    bsrl %esi, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:    addl $224, %ebx
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:  .LBB4_21: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    subl $255, %ebx
+; X86-NEXT:    sbbl $0, %edx
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl %ebp, (%esp) # 4-byte Spill
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB4_22
+; X86-NEXT:  # %bb.23: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    addl $32, %eax
+; X86-NEXT:    jmp .LBB4_24
+; X86-NEXT:  .LBB4_22:
+; X86-NEXT:    bsrl %eax, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:  .LBB4_24: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB4_25
+; X86-NEXT:  # %bb.26: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:    addl $32, %edx
+; X86-NEXT:    jmp .LBB4_27
+; X86-NEXT:  .LBB4_25:
+; X86-NEXT:    bsrl %edx, %edx
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:  .LBB4_27: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB4_29
+; X86-NEXT:  # %bb.28: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %edx
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB4_29: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB4_30
+; X86-NEXT:  # %bb.31: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:    addl $32, %edx
+; X86-NEXT:    jmp .LBB4_32
+; X86-NEXT:  .LBB4_30:
+; X86-NEXT:    bsrl %edx, %edx
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:  .LBB4_32: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB4_33
+; X86-NEXT:  # %bb.34: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:    addl $32, %esi
+; X86-NEXT:    jmp .LBB4_35
+; X86-NEXT:  .LBB4_33:
+; X86-NEXT:    bsrl %ecx, %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:  .LBB4_35: # %_udiv-special-cases
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB4_37
+; X86-NEXT:  # %bb.36: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %esi
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB4_37: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    jne .LBB4_39
+; X86-NEXT:  # %bb.38: # %_udiv-special-cases
+; X86-NEXT:    subl $-128, %edx
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB4_39: # %_udiv-special-cases
+; X86-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    addl $256, %eax # imm = 0x100
+; X86-NEXT:    setb %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jne .LBB4_40
+; X86-NEXT:  # %bb.41: # %_udiv-special-cases
+; X86-NEXT:    movb %cl, %dl
+; X86-NEXT:    jmp .LBB4_42
+; X86-NEXT:  .LBB4_40:
+; X86-NEXT:    bsrl %esi, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    addl $224, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:  .LBB4_42: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    xorl %ecx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    subl $255, %eax
+; X86-NEXT:    sbbl $0, %edx
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    sbbl %ebx, %ebx
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    subl %eax, %edi
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl %ecx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl %ebx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $256, %eax # imm = 0x100
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl %edi, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %edx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %esi, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %ebp, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %ebx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %ecx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl (%esp), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    setb %al
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
+; X86-NEXT:    orb %al, %dl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB4_44
+; X86-NEXT:  # %bb.43: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB4_44: # %_udiv-special-cases
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    andl $1, %esi
+; X86-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; X86-NEXT:    testb %dl, %dl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB4_46
+; X86-NEXT:  # %bb.45: # %_udiv-special-cases
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB4_46: # %_udiv-special-cases
+; X86-NEXT:    jne .LBB4_47
+; X86-NEXT:  # %bb.430: # %_udiv-special-cases
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    xorl $256, %edx # imm = 0x100
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB4_431
+; X86-NEXT:  # %bb.269: # %udiv-bb1
+; X86-NEXT:    addl $-128, %ebx
+; X86-NEXT:    negl %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB4_271
+; X86-NEXT:  # %bb.270: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB4_271: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB4_273
+; X86-NEXT:  # %bb.272: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB4_273: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    jne .LBB4_275
+; X86-NEXT:  # %bb.274: # %udiv-bb1
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB4_275: # %udiv-bb1
+; X86-NEXT:    movl $256, %ecx # imm = 0x100
+; X86-NEXT:    subl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB4_277
+; X86-NEXT:  # %bb.276: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_277: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB4_279
+; X86-NEXT:  # %bb.278: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB4_279: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB4_281
+; X86-NEXT:  # %bb.280: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB4_281: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB4_283
+; X86-NEXT:  # %bb.282: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_283: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB4_285
+; X86-NEXT:  # %bb.284: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB4_285: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jne .LBB4_287
+; X86-NEXT:  # %bb.286: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_287: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    jne .LBB4_289
+; X86-NEXT:  # %bb.288: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB4_289: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %dl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    jne .LBB4_291
+; X86-NEXT:  # %bb.290: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_291: # %udiv-bb1
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_293
+; X86-NEXT:  # %bb.292: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_293: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    jae .LBB4_294
+; X86-NEXT:  # %bb.295: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB4_296
+; X86-NEXT:  .LBB4_297: # %udiv-bb1
+; X86-NEXT:    subl $128, %eax
+; X86-NEXT:    jb .LBB4_299
+; X86-NEXT:  .LBB4_298: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB4_299: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB4_301
+; X86-NEXT:  # %bb.300: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB4_301: # %udiv-bb1
+; X86-NEXT:    negl %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    jne .LBB4_303
+; X86-NEXT:  # %bb.302: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:  .LBB4_303: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shrdl %cl, %ebp, %edx
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    jne .LBB4_305
+; X86-NEXT:  # %bb.304: # %udiv-bb1
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB4_305: # %udiv-bb1
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %al, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB4_307
+; X86-NEXT:  # %bb.306: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB4_307: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    jae .LBB4_309
+; X86-NEXT:  # %bb.308:
+; X86-NEXT:    orl %ecx, %edi
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:  .LBB4_309: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB4_311
+; X86-NEXT:  # %bb.310: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:  .LBB4_311: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    jae .LBB4_313
+; X86-NEXT:  # %bb.312:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_313: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB4_315
+; X86-NEXT:  # %bb.314: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB4_315: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB4_317
+; X86-NEXT:  # %bb.316: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB4_317: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB4_319
+; X86-NEXT:  # %bb.318: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_319: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_321
+; X86-NEXT:  # %bb.320: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_321: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB4_323
+; X86-NEXT:  # %bb.322: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB4_323: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB4_325
+; X86-NEXT:  # %bb.324: # %udiv-bb1
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB4_325: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB4_327
+; X86-NEXT:  # %bb.326:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:  .LBB4_327: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB4_329
+; X86-NEXT:  # %bb.328: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB4_329: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_331
+; X86-NEXT:  # %bb.330: # %udiv-bb1
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB4_331: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB4_333
+; X86-NEXT:  # %bb.332: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB4_333: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jae .LBB4_335
+; X86-NEXT:  # %bb.334:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_335: # %udiv-bb1
+; X86-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB4_337
+; X86-NEXT:  # %bb.336:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:  .LBB4_337: # %udiv-bb1
+; X86-NEXT:    movb $64, %ch
+; X86-NEXT:    subb %bl, %ch
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shrdl %cl, %ebp, %edi
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    jne .LBB4_339
+; X86-NEXT:  # %bb.338: # %udiv-bb1
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB4_339: # %udiv-bb1
+; X86-NEXT:    movb %bl, %cl
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB4_341
+; X86-NEXT:  # %bb.340: # %udiv-bb1
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:  .LBB4_341: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jae .LBB4_343
+; X86-NEXT:  # %bb.342:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_343: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jae .LBB4_345
+; X86-NEXT:  # %bb.344:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_345: # %udiv-bb1
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB4_347
+; X86-NEXT:  # %bb.346: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB4_347: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB4_349
+; X86-NEXT:  # %bb.348:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_349: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %edi
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    jne .LBB4_351
+; X86-NEXT:  # %bb.350: # %udiv-bb1
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB4_351: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB4_353
+; X86-NEXT:  # %bb.352: # %udiv-bb1
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB4_353: # %udiv-bb1
+; X86-NEXT:    cmpl $0, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB4_355
+; X86-NEXT:  # %bb.354: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB4_355: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    je .LBB4_357
+; X86-NEXT:  # %bb.356: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB4_357: # %udiv-bb1
+; X86-NEXT:    cmpl $128, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jb .LBB4_358
+; X86-NEXT:  # %bb.359: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB4_360
+; X86-NEXT:  .LBB4_47:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    jmp .LBB4_431
+; X86-NEXT:  .LBB4_294: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB4_297
+; X86-NEXT:  .LBB4_296: # %udiv-bb1
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    subl $128, %eax
+; X86-NEXT:    jae .LBB4_298
+; X86-NEXT:    jmp .LBB4_299
+; X86-NEXT:  .LBB4_358:
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_360: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB4_362
+; X86-NEXT:  # %bb.361: # %udiv-bb1
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB4_362: # %udiv-bb1
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB4_364
+; X86-NEXT:  # %bb.363: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:  .LBB4_364: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_366
+; X86-NEXT:  # %bb.365: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_366: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB4_368
+; X86-NEXT:  # %bb.367: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_368: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB4_370
+; X86-NEXT:  # %bb.369: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB4_370: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB4_372
+; X86-NEXT:  # %bb.371: # %udiv-bb1
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB4_372: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jae .LBB4_374
+; X86-NEXT:  # %bb.373:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_374: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    jne .LBB4_376
+; X86-NEXT:  # %bb.375: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_376: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB4_378
+; X86-NEXT:  # %bb.377: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB4_378: # %udiv-bb1
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB4_380
+; X86-NEXT:  # %bb.379: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB4_380: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jae .LBB4_382
+; X86-NEXT:  # %bb.381:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:  .LBB4_382: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    je .LBB4_384
+; X86-NEXT:  # %bb.383: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB4_384: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jb .LBB4_385
+; X86-NEXT:  # %bb.386: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB4_387
+; X86-NEXT:  .LBB4_385:
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_387: # %udiv-bb1
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB4_389
+; X86-NEXT:  # %bb.388: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB4_389: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %esi
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_391
+; X86-NEXT:  # %bb.390: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB4_391: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edi
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_393
+; X86-NEXT:  # %bb.392: # %udiv-bb1
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB4_393: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB4_395
+; X86-NEXT:  # %bb.394:
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB4_395: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB4_397
+; X86-NEXT:  # %bb.396: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:  .LBB4_397: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %esi
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    jne .LBB4_399
+; X86-NEXT:  # %bb.398: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB4_399: # %udiv-bb1
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %al, %cl
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB4_401
+; X86-NEXT:  # %bb.400: # %udiv-bb1
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB4_401: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %ebx
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_403
+; X86-NEXT:  # %bb.402: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:  .LBB4_403: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    jae .LBB4_405
+; X86-NEXT:  # %bb.404:
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB4_405: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    je .LBB4_407
+; X86-NEXT:  # %bb.406: # %udiv-bb1
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:  .LBB4_407: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %ebx
+; X86-NEXT:    jae .LBB4_409
+; X86-NEXT:  # %bb.408:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:  .LBB4_409: # %udiv-bb1
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    je .LBB4_411
+; X86-NEXT:  # %bb.410: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:  .LBB4_411: # %udiv-bb1
+; X86-NEXT:    cmpl $0, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB4_413
+; X86-NEXT:  # %bb.412: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB4_413: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB4_415
+; X86-NEXT:  # %bb.414: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB4_415: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jb .LBB4_417
+; X86-NEXT:  # %bb.416: # %udiv-bb1
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:  .LBB4_417: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB4_419
+; X86-NEXT:  # %bb.418: # %udiv-bb1
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:  .LBB4_419: # %udiv-bb1
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jb .LBB4_420
+; X86-NEXT:  # %bb.421: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:    jmp .LBB4_422
+; X86-NEXT:  .LBB4_420:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl %esi, %ebx
+; X86-NEXT:  .LBB4_422: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %edi # 4-byte Reload
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    je .LBB4_424
+; X86-NEXT:  # %bb.423: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB4_424: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB4_426
+; X86-NEXT:  # %bb.425: # %udiv-bb1
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB4_426: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $256, %edi # imm = 0x100
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jb .LBB4_428
+; X86-NEXT:  # %bb.427: # %udiv-bb1
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB4_428: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    addl $1, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl $0, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl $0, %ecx
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    je .LBB4_429
+; X86-NEXT:  # %bb.50: # %udiv-preheader
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB4_52
+; X86-NEXT:  # %bb.51: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB4_52: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB4_54
+; X86-NEXT:  # %bb.53: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB4_54: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    subb %dl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB4_56
+; X86-NEXT:  # %bb.55:
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB4_56: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_58
+; X86-NEXT:  # %bb.57: # %udiv-preheader
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:  .LBB4_58: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_60
+; X86-NEXT:  # %bb.59: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_60: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_62
+; X86-NEXT:  # %bb.61: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_62: # %udiv-preheader
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    shrdl %cl, %ebp, %ebx
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB4_64
+; X86-NEXT:  # %bb.63:
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:  .LBB4_64: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB4_65
+; X86-NEXT:  # %bb.66: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB4_67
+; X86-NEXT:  .LBB4_429:
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB4_49
+; X86-NEXT:  .LBB4_65:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl (%esp), %ebx # 4-byte Reload
+; X86-NEXT:    orl %esi, %ebx
+; X86-NEXT:  .LBB4_67: # %udiv-preheader
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $128, %edi
+; X86-NEXT:    jb .LBB4_69
+; X86-NEXT:  # %bb.68: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB4_69: # %udiv-preheader
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB4_71
+; X86-NEXT:  # %bb.70: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:  .LBB4_71: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    subl $128, %eax
+; X86-NEXT:    jb .LBB4_73
+; X86-NEXT:  # %bb.72: # %udiv-preheader
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:  .LBB4_73: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    negl %eax
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %al, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %edi
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    jne .LBB4_75
+; X86-NEXT:  # %bb.74: # %udiv-preheader
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:  .LBB4_75: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB4_77
+; X86-NEXT:  # %bb.76: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:  .LBB4_77: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB4_79
+; X86-NEXT:  # %bb.78: # %udiv-preheader
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:  .LBB4_79: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB4_81
+; X86-NEXT:  # %bb.80:
+; X86-NEXT:    orl %ebx, %edx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB4_81: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    je .LBB4_83
+; X86-NEXT:  # %bb.82: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB4_83: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    leal -128(%eax), %ecx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    shrdl %cl, %ebx, %edi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_84
+; X86-NEXT:  # %bb.85: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB4_86
+; X86-NEXT:  .LBB4_84:
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB4_86: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB4_88
+; X86-NEXT:  # %bb.87: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB4_88: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB4_90
+; X86-NEXT:  # %bb.89:
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB4_90: # %udiv-preheader
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_92
+; X86-NEXT:  # %bb.91: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:  .LBB4_92: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB4_94
+; X86-NEXT:  # %bb.93: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB4_94: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_96
+; X86-NEXT:  # %bb.95: # %udiv-preheader
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB4_96: # %udiv-preheader
+; X86-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jae .LBB4_98
+; X86-NEXT:  # %bb.97:
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:  .LBB4_98: # %udiv-preheader
+; X86-NEXT:    cmpl $0, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB4_100
+; X86-NEXT:  # %bb.99: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB4_100: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    jae .LBB4_102
+; X86-NEXT:  # %bb.101:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_102: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB4_104
+; X86-NEXT:  # %bb.103: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB4_104: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %eax
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    jne .LBB4_106
+; X86-NEXT:  # %bb.105: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB4_106: # %udiv-preheader
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jb .LBB4_108
+; X86-NEXT:  # %bb.107: # %udiv-preheader
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB4_108: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    jne .LBB4_110
+; X86-NEXT:  # %bb.109: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_110: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    jne .LBB4_112
+; X86-NEXT:  # %bb.111: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB4_112: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB4_114
+; X86-NEXT:  # %bb.113: # %udiv-preheader
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB4_114: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB4_116
+; X86-NEXT:  # %bb.115: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:  .LBB4_116: # %udiv-preheader
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    shrdl %cl, %ebx, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB4_118
+; X86-NEXT:  # %bb.117: # %udiv-preheader
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:  .LBB4_118: # %udiv-preheader
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jb .LBB4_120
+; X86-NEXT:  # %bb.119: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB4_120: # %udiv-preheader
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    je .LBB4_122
+; X86-NEXT:  # %bb.121: # %udiv-preheader
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:  .LBB4_122: # %udiv-preheader
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    leal -256(%ebx), %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jb .LBB4_124
+; X86-NEXT:  # %bb.123: # %udiv-preheader
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:  .LBB4_124: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB4_126
+; X86-NEXT:  # %bb.125: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB4_126: # %udiv-preheader
+; X86-NEXT:    cmpl $256, %ebx # imm = 0x100
+; X86-NEXT:    jb .LBB4_127
+; X86-NEXT:  # %bb.128: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB4_129
+; X86-NEXT:  .LBB4_127:
+; X86-NEXT:    orl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB4_129: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jb .LBB4_131
+; X86-NEXT:  # %bb.130: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB4_131: # %udiv-preheader
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    je .LBB4_133
+; X86-NEXT:  # %bb.132: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB4_133: # %udiv-preheader
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    je .LBB4_135
+; X86-NEXT:  # %bb.134: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:  .LBB4_135: # %udiv-preheader
+; X86-NEXT:    cmpl $256, %ebx # imm = 0x100
+; X86-NEXT:    jb .LBB4_136
+; X86-NEXT:  # %bb.137: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB4_138
+; X86-NEXT:  .LBB4_136:
+; X86-NEXT:    orl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB4_138: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    subl $128, %ecx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jae .LBB4_139
+; X86-NEXT:  # %bb.140: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne .LBB4_141
+; X86-NEXT:  .LBB4_142: # %udiv-preheader
+; X86-NEXT:    cmpl $256, %ebx # imm = 0x100
+; X86-NEXT:    jb .LBB4_143
+; X86-NEXT:  .LBB4_144: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB4_145
+; X86-NEXT:  .LBB4_139: # %udiv-preheader
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    je .LBB4_142
+; X86-NEXT:  .LBB4_141: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    cmpl $256, %ebx # imm = 0x100
+; X86-NEXT:    jae .LBB4_144
+; X86-NEXT:  .LBB4_143:
+; X86-NEXT:    orl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB4_145: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB4_147
+; X86-NEXT:  # %bb.146: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB4_147: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB4_149
+; X86-NEXT:  # %bb.148: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB4_149: # %udiv-preheader
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    je .LBB4_151
+; X86-NEXT:  # %bb.150: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB4_151: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB4_152
+; X86-NEXT:  # %bb.153: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne .LBB4_154
+; X86-NEXT:  .LBB4_155: # %udiv-preheader
+; X86-NEXT:    cmpl $256, %ebx # imm = 0x100
+; X86-NEXT:    jb .LBB4_156
+; X86-NEXT:  .LBB4_157: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB4_158
+; X86-NEXT:  .LBB4_152: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    je .LBB4_155
+; X86-NEXT:  .LBB4_154: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    cmpl $256, %ebx # imm = 0x100
+; X86-NEXT:    jae .LBB4_157
+; X86-NEXT:  .LBB4_156:
+; X86-NEXT:    orl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB4_158: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    subb %al, %cl
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    shrdl %cl, %ebx, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB4_160
+; X86-NEXT:  # %bb.159: # %udiv-preheader
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:  .LBB4_160: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB4_162
+; X86-NEXT:  # %bb.161: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:  .LBB4_162: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB4_164
+; X86-NEXT:  # %bb.163: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB4_164: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    je .LBB4_166
+; X86-NEXT:  # %bb.165: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:  .LBB4_166: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB4_168
+; X86-NEXT:  # %bb.167: # %udiv-preheader
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB4_168: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB4_170
+; X86-NEXT:  # %bb.169: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB4_170: # %udiv-preheader
+; X86-NEXT:    cmpl $256, %edx # imm = 0x100
+; X86-NEXT:    jb .LBB4_171
+; X86-NEXT:  # %bb.172: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB4_173
+; X86-NEXT:  .LBB4_171:
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_173: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_175
+; X86-NEXT:  # %bb.174: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB4_175: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jb .LBB4_177
+; X86-NEXT:  # %bb.176: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:  .LBB4_177: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    je .LBB4_179
+; X86-NEXT:  # %bb.178: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB4_179: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jb .LBB4_181
+; X86-NEXT:  # %bb.180: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB4_181: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB4_183
+; X86-NEXT:  # %bb.182: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB4_183: # %udiv-preheader
+; X86-NEXT:    cmpl $256, %ecx # imm = 0x100
+; X86-NEXT:    jb .LBB4_184
+; X86-NEXT:  # %bb.185: # %udiv-preheader
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    jmp .LBB4_186
+; X86-NEXT:  .LBB4_184:
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:  .LBB4_186: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB4_188
+; X86-NEXT:  # %bb.187: # %udiv-preheader
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB4_188: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    jae .LBB4_189
+; X86-NEXT:  # %bb.190: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB4_191
+; X86-NEXT:  .LBB4_192: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB4_194
+; X86-NEXT:  .LBB4_193: # %udiv-preheader
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB4_194: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB4_196
+; X86-NEXT:  # %bb.195: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB4_196: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB4_198
+; X86-NEXT:  # %bb.197: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_198: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_200
+; X86-NEXT:  # %bb.199: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB4_200: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB4_202
+; X86-NEXT:  # %bb.201: # %udiv-preheader
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB4_202: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_204
+; X86-NEXT:  # %bb.203: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_204: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB4_206
+; X86-NEXT:  # %bb.205: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB4_206: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_208
+; X86-NEXT:  # %bb.207: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB4_208: # %udiv-preheader
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %ebx
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB4_210
+; X86-NEXT:  # %bb.209:
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB4_210: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB4_212
+; X86-NEXT:  # %bb.211:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_212: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    subb %dl, %cl
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB4_214
+; X86-NEXT:  # %bb.213: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB4_214: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jae .LBB4_216
+; X86-NEXT:  # %bb.215:
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:  .LBB4_216: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB4_218
+; X86-NEXT:  # %bb.217: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB4_218: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB4_220
+; X86-NEXT:  # %bb.219: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:  .LBB4_220: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB4_222
+; X86-NEXT:  # %bb.221:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB4_222: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    je .LBB4_224
+; X86-NEXT:  # %bb.223: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:  .LBB4_224: # %udiv-preheader
+; X86-NEXT:    leal -256(%eax), %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB4_226
+; X86-NEXT:  # %bb.225: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB4_226: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB4_228
+; X86-NEXT:  # %bb.227: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB4_228: # %udiv-preheader
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB4_230
+; X86-NEXT:  # %bb.229: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB4_230: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jb .LBB4_232
+; X86-NEXT:  # %bb.231: # %udiv-preheader
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:  .LBB4_232: # %udiv-preheader
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB4_234
+; X86-NEXT:  # %bb.233: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB4_234: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB4_235
+; X86-NEXT:  # %bb.236: # %udiv-preheader
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    jmp .LBB4_237
+; X86-NEXT:  .LBB4_189: # %udiv-preheader
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB4_192
+; X86-NEXT:  .LBB4_191: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB4_193
+; X86-NEXT:    jmp .LBB4_194
+; X86-NEXT:  .LBB4_235:
+; X86-NEXT:    orl %esi, %ebx
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:  .LBB4_237: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_239
+; X86-NEXT:  # %bb.238: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_239: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_241
+; X86-NEXT:  # %bb.240: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB4_241: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB4_242
+; X86-NEXT:  # %bb.243: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jae .LBB4_244
+; X86-NEXT:  .LBB4_245: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB4_247
+; X86-NEXT:  .LBB4_246: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB4_247: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB4_249
+; X86-NEXT:  # %bb.248: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_249: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB4_250
+; X86-NEXT:  # %bb.251: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB4_252
+; X86-NEXT:  .LBB4_253: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jae .LBB4_255
+; X86-NEXT:  .LBB4_254:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_255: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB4_257
+; X86-NEXT:  # %bb.256: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB4_257: # %udiv-preheader
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB4_259
+; X86-NEXT:  # %bb.258: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB4_259: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    jae .LBB4_261
+; X86-NEXT:  # %bb.260:
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB4_261: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB4_263
+; X86-NEXT:  # %bb.262: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB4_263: # %udiv-preheader
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    jb .LBB4_264
+; X86-NEXT:  # %bb.265: # %udiv-preheader
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    jmp .LBB4_266
+; X86-NEXT:  .LBB4_242: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB4_245
+; X86-NEXT:  .LBB4_244: # %udiv-preheader
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB4_246
+; X86-NEXT:    jmp .LBB4_247
+; X86-NEXT:  .LBB4_250: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB4_253
+; X86-NEXT:  .LBB4_252:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jb .LBB4_254
+; X86-NEXT:    jmp .LBB4_255
+; X86-NEXT:  .LBB4_264:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:  .LBB4_266: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    je .LBB4_268
+; X86-NEXT:  # %bb.267: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB4_268: # %udiv-preheader
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    andl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    addl $-1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl $-1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl $-1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl $-1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl $-1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl $-1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    adcl $-1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    adcl $-1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $1, %ebx
+; X86-NEXT:    andl $1, %ebx
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB4_48: # %udiv-do-while
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl $1, %esi, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebp, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebp, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    andl $1, %ebp
+; X86-NEXT:    leal (%ebp,%ecx,2), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %esi, %ebp
+; X86-NEXT:    movl %ebp, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebp, %ecx
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %ebp
+; X86-NEXT:    orl %esi, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebp, %ecx
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %ebp
+; X86-NEXT:    orl %esi, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl $1, %edi, %ecx
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shrl $31, %edx
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    addl %ecx, %ecx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $1, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl %edi, %esi
+; X86-NEXT:    andl $1, %esi
+; X86-NEXT:    negl %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    subl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    addl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl $-1, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl $-1, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    adcl $-1, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl $1, %ecx
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ebx, %ebp
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edi, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl %ebp, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB4_48
+; X86-NEXT:  .LBB4_49: # %udiv-loop-exit
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl $1, %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edx
+; X86-NEXT:    orl %ebp, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebx, %eax
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl $1, %esi, %ebx
+; X86-NEXT:    orl %ebp, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %esi
+; X86-NEXT:    orl %ebp, %esi
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl $1, %edx, %edi
+; X86-NEXT:    orl %ebp, %edi
+; X86-NEXT:    shrl $31, %ecx
+; X86-NEXT:    orl %ebp, %ecx
+; X86-NEXT:    addl %edx, %edx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:  .LBB4_431: # %udiv-end
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    xorl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    xorl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    xorl %eax, %edx
+; X86-NEXT:    xorl %eax, %esi
+; X86-NEXT:    xorl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %eax, %ebx
+; X86-NEXT:    xorl %eax, %edi
+; X86-NEXT:    xorl %eax, %ebp
+; X86-NEXT:    subl %eax, %ebp
+; X86-NEXT:    sbbl %eax, %edi
+; X86-NEXT:    sbbl %eax, %ebx
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, %esi
+; X86-NEXT:    sbbl %eax, %edx
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %ebp, (%eax)
+; X86-NEXT:    movl %edi, 4(%eax)
+; X86-NEXT:    movl %ebx, 8(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    movl %esi, 16(%eax)
+; X86-NEXT:    movl %edx, 20(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, 24(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, 28(%eax)
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    movb %cl, 32(%eax)
+; X86-NEXT:    addl $240, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: sdiv257:
+; X64:       # %bb.0: # %_udiv-special-cases
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %r13
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    subq $88, %rsp
+; X64-NEXT:    movq %r9, %r10
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r11
+; X64-NEXT:    movl %r11d, %r13d
+; X64-NEXT:    andl $1, %r13d
+; X64-NEXT:    negq %r13
+; X64-NEXT:    movl %r10d, %eax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    xorq %rax, %r10
+; X64-NEXT:    xorq %rax, %r8
+; X64-NEXT:    xorq %rax, %rcx
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    xorq %rax, %rsi
+; X64-NEXT:    subq %rax, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rax, %rcx
+; X64-NEXT:    movq %rcx, %r9
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rax, %r8
+; X64-NEXT:    sbbq %rax, %r10
+; X64-NEXT:    movl %r10d, %r12d
+; X64-NEXT:    andl $1, %r12d
+; X64-NEXT:    xorq %r13, %r11
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
+; X64-NEXT:    xorq %r13, %rbx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r14
+; X64-NEXT:    xorq %r13, %r14
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
+; X64-NEXT:    xorq %r13, %rbp
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    xorq %r13, %rdi
+; X64-NEXT:    subq %r13, %rdi
+; X64-NEXT:    sbbq %r13, %rbp
+; X64-NEXT:    sbbq %r13, %r14
+; X64-NEXT:    sbbq %r13, %rbx
+; X64-NEXT:    sbbq %r13, %r11
+; X64-NEXT:    andl $1, %r11d
+; X64-NEXT:    movq %r13, %rcx
+; X64-NEXT:    xorq %rax, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    xorl %eax, %r13d
+; X64-NEXT:    andl $1, %r13d
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    orq %rbx, %rax
+; X64-NEXT:    movq %r11, %rcx
+; X64-NEXT:    orq %r14, %rcx
+; X64-NEXT:    orq %rdi, %rcx
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    sete {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    orq %r8, %rcx
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movq %r12, %rdx
+; X64-NEXT:    orq %r9, %rdx
+; X64-NEXT:    orq %rsi, %rdx
+; X64-NEXT:    orq %rcx, %rdx
+; X64-NEXT:    sete {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill
+; X64-NEXT:    bsrq %rbx, %rdx
+; X64-NEXT:    xorq $63, %rdx
+; X64-NEXT:    bsrq %r14, %rsi
+; X64-NEXT:    xorq $63, %rsi
+; X64-NEXT:    addq $64, %rsi
+; X64-NEXT:    testq %rbx, %rbx
+; X64-NEXT:    cmovneq %rdx, %rsi
+; X64-NEXT:    bsrq %rbp, %rdx
+; X64-NEXT:    xorq $63, %rdx
+; X64-NEXT:    movq %rdi, (%rsp) # 8-byte Spill
+; X64-NEXT:    bsrq %rdi, %rdi
+; X64-NEXT:    xorq $63, %rdi
+; X64-NEXT:    addq $64, %rdi
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testq %rbp, %rbp
+; X64-NEXT:    cmovneq %rdx, %rdi
+; X64-NEXT:    subq $-128, %rdi
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r14, %rdx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %rbx, %rdx
+; X64-NEXT:    cmovneq %rsi, %rdi
+; X64-NEXT:    addq $256, %rdi # imm = 0x100
+; X64-NEXT:    bsrq %r11, %r8
+; X64-NEXT:    xorq $63, %r8
+; X64-NEXT:    addq $192, %r8
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testq %r11, %r11
+; X64-NEXT:    cmoveq %rdi, %r8
+; X64-NEXT:    subq $255, %r8
+; X64-NEXT:    movl $0, %r15d
+; X64-NEXT:    sbbq %r15, %r15
+; X64-NEXT:    movl $0, %r11d
+; X64-NEXT:    sbbq %r11, %r11
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %r9d
+; X64-NEXT:    sbbq %r9, %r9
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %r14d
+; X64-NEXT:    sbbq %r14, %r14
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    bsrq %rax, %rdi
+; X64-NEXT:    xorq $63, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    bsrq %rdx, %rbx
+; X64-NEXT:    xorq $63, %rbx
+; X64-NEXT:    addq $64, %rbx
+; X64-NEXT:    testq %rax, %rax
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovneq %rdi, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    bsrq %rcx, %rdi
+; X64-NEXT:    xorq $63, %rdi
+; X64-NEXT:    bsrq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    addq $64, %rax
+; X64-NEXT:    testq %rcx, %rcx
+; X64-NEXT:    cmovneq %rdi, %rax
+; X64-NEXT:    subq $-128, %rax
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    orq %rsi, %rdi
+; X64-NEXT:    cmovneq %rbx, %rax
+; X64-NEXT:    addq $256, %rax # imm = 0x100
+; X64-NEXT:    bsrq %r12, %rdi
+; X64-NEXT:    xorq $63, %rdi
+; X64-NEXT:    addq $192, %rdi
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testq %r12, %r12
+; X64-NEXT:    cmoveq %rax, %rdi
+; X64-NEXT:    subq $255, %rdi
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movl $0, %ebx
+; X64-NEXT:    sbbq %rbx, %rbx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    movl $0, %edx
+; X64-NEXT:    sbbq %rdx, %rdx
+; X64-NEXT:    movl $0, %esi
+; X64-NEXT:    sbbq %rsi, %rsi
+; X64-NEXT:    movl $0, %ebp
+; X64-NEXT:    sbbq %rbp, %rbp
+; X64-NEXT:    movl $0, %r12d
+; X64-NEXT:    sbbq %r12, %r12
+; X64-NEXT:    subq %rdi, %r8
+; X64-NEXT:    sbbq %rax, %r15
+; X64-NEXT:    sbbq %rbx, %r11
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    sbbq %rcx, %rbx
+; X64-NEXT:    sbbq %rdx, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    sbbq %rsi, %rdi
+; X64-NEXT:    sbbq %rbp, %r14
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    sbbq %r12, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    movl $256, %eax # imm = 0x100
+; X64-NEXT:    cmpq %r8, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq %r15, %rcx
+; X64-NEXT:    sbbq %r15, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq %r11, %r15
+; X64-NEXT:    sbbq %r11, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rbx, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %r9, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rdi, %rax
+; X64-NEXT:    movl $0, %edi
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %r14, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rdx, %rax
+; X64-NEXT:    setb %al
+; X64-NEXT:    orb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
+; X64-NEXT:    orb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
+; X64-NEXT:    cmovneq %rdi, %r10
+; X64-NEXT:    andl $1, %r10d
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    cmovneq %rdi, %rdx
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    cmovneq %rdi, %r11
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    cmovneq %rdi, %rbp
+; X64-NEXT:    cmoveq %r12, %rdi
+; X64-NEXT:    jne .LBB4_6
+; X64-NEXT:  # %bb.1: # %_udiv-special-cases
+; X64-NEXT:    andl $1, %r9d
+; X64-NEXT:    movq %rcx, %r14
+; X64-NEXT:    orq %rbx, %rcx
+; X64-NEXT:    movq %r8, %rsi
+; X64-NEXT:    xorq $256, %rsi # imm = 0x100
+; X64-NEXT:    orq %r9, %rsi
+; X64-NEXT:    orq %r15, %rsi
+; X64-NEXT:    orq %rcx, %rsi
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    je .LBB4_6
+; X64-NEXT:  # %bb.2: # %udiv-bb1
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $256, %esi # imm = 0x100
+; X64-NEXT:    subl %r8d, %esi
+; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movb $-128, %bl
+; X64-NEXT:    subb %sil, %bl
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    movq %r14, %rdx
+; X64-NEXT:    movl %ebx, %ecx
+; X64-NEXT:    shrq %cl, %rdx
+; X64-NEXT:    xorl %ebp, %ebp
+; X64-NEXT:    testb $64, %bl
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    cmovneq %rbp, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    movq %rax, %r10
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %r12, %rdi
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    shlq %cl, %rdi
+; X64-NEXT:    testb $64, %sil
+; X64-NEXT:    movq %rdi, %r13
+; X64-NEXT:    cmovneq %rbp, %r13
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    cmovneq %rbp, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r12, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    shlq %cl, %r12
+; X64-NEXT:    testb $64, %sil
+; X64-NEXT:    cmovneq %rbp, %r12
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    movq %r11, %r15
+; X64-NEXT:    movq %r10, %r9
+; X64-NEXT:    shldq %cl, %r10, %r15
+; X64-NEXT:    testb $64, %sil
+; X64-NEXT:    cmovneq %rax, %r15
+; X64-NEXT:    movq %r14, %r10
+; X64-NEXT:    shldq %cl, %r8, %r10
+; X64-NEXT:    testb $64, %sil
+; X64-NEXT:    cmovneq %rdi, %r10
+; X64-NEXT:    movq %r8, %rdi
+; X64-NEXT:    movl %ebx, %ecx
+; X64-NEXT:    shrdq %cl, %r14, %rdi
+; X64-NEXT:    testb $64, %bl
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    cmovneq %rdx, %rdi
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movl %ebx, %ecx
+; X64-NEXT:    shrdq %cl, %r14, %rax
+; X64-NEXT:    movq %r14, %rdx
+; X64-NEXT:    movl %ebx, %ecx
+; X64-NEXT:    shrq %cl, %rdx
+; X64-NEXT:    testb $64, %bl
+; X64-NEXT:    cmoveq %rax, %rdx
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    subb %bl, %cl
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rbp, %rax
+; X64-NEXT:    orl %edx, %eax
+; X64-NEXT:    leal -128(%rbx), %ecx
+; X64-NEXT:    movq %r9, %rdx
+; X64-NEXT:    shrdq %cl, %r11, %rdx
+; X64-NEXT:    movq %r11, %rbp
+; X64-NEXT:    shrq %cl, %rbp
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmoveq %rdx, %rbp
+; X64-NEXT:    cmpl $128, %ebx
+; X64-NEXT:    cmovbl %eax, %ebp
+; X64-NEXT:    testl %ebx, %ebx
+; X64-NEXT:    movl %r8d, %eax
+; X64-NEXT:    cmovnel %ebp, %eax
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    xorl %r9d, %r9d
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; X64-NEXT:    leal -128(%rsi), %ecx
+; X64-NEXT:    movq %r14, %rbp
+; X64-NEXT:    shldq %cl, %r8, %rbp
+; X64-NEXT:    movq %r8, %rdx
+; X64-NEXT:    shlq %cl, %rdx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdx, %rbp
+; X64-NEXT:    cmovneq %r9, %rdx
+; X64-NEXT:    cmpl $128, %esi
+; X64-NEXT:    cmovbq %r15, %rbp
+; X64-NEXT:    cmovbq %rdi, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    cmovaeq %r9, %r10
+; X64-NEXT:    cmovaeq %r9, %r13
+; X64-NEXT:    cmovael %r9d, %r12d
+; X64-NEXT:    orl %eax, %r12d
+; X64-NEXT:    cmpl $256, %esi # imm = 0x100
+; X64-NEXT:    cmovaeq %r9, %r13
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl %ebx, %ecx
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    shlq %cl, %r8
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r9, %r8
+; X64-NEXT:    cmpl $128, %ecx
+; X64-NEXT:    cmovaeq %r9, %r8
+; X64-NEXT:    cmpl $256, %esi # imm = 0x100
+; X64-NEXT:    cmovaeq %r9, %r10
+; X64-NEXT:    cmovaeq %r8, %r12
+; X64-NEXT:    testl %esi, %esi
+; X64-NEXT:    cmoveq %r11, %rbp
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %esi # imm = 0x100
+; X64-NEXT:    cmovaeq %r9, %rbp
+; X64-NEXT:    cmovaeq %r9, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testl %esi, %esi
+; X64-NEXT:    cmoveq %rdi, %r12
+; X64-NEXT:    addq $1, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    adcq $0, %rdx
+; X64-NEXT:    adcq $0, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    adcq $0, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    andl $1, %esi
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %rcx, %rax
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:    orq %r9, %rcx
+; X64-NEXT:    orq %rbx, %rcx
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    je .LBB4_7
+; X64-NEXT:  # %bb.3: # %udiv-preheader
+; X64-NEXT:    movq %rdi, %r13
+; X64-NEXT:    andl $1, %r12d
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r11, %rsi
+; X64-NEXT:    movl %ebx, %ecx
+; X64-NEXT:    shrq %cl, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r11, %r12
+; X64-NEXT:    movq %rbx, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    movq %r10, %rbx
+; X64-NEXT:    movl %r8d, %ecx
+; X64-NEXT:    shrq %cl, %rbx
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovneq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovneq %rcx, %rsi
+; X64-NEXT:    cmpl $128, %r8d
+; X64-NEXT:    cmovaeq %rcx, %rsi
+; X64-NEXT:    xorl %r14d, %r14d
+; X64-NEXT:    leal -256(%r8), %r15d
+; X64-NEXT:    negl %r15d
+; X64-NEXT:    leal -128(%r15), %ecx
+; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    shldq %cl, %r13, %rdx
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rax, %rdx
+; X64-NEXT:    cmpl $128, %r15d
+; X64-NEXT:    cmovbq %r14, %rdx
+; X64-NEXT:    testl %r15d, %r15d
+; X64-NEXT:    cmoveq %r14, %rdx
+; X64-NEXT:    orq %rsi, %rdx
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    leal -128(%r8), %r9d
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    movl %r9d, %ecx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    testb $64, %r9b
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    cmovneq %r14, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $256, %r8d # imm = 0x100
+; X64-NEXT:    cmovaeq %r14, %rdx
+; X64-NEXT:    movq %r10, %r11
+; X64-NEXT:    movq %rdi, %rsi
+; X64-NEXT:    movl %r15d, %ecx
+; X64-NEXT:    shlq %cl, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r15b
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovneq %rcx, %rsi
+; X64-NEXT:    cmpl $128, %r15d
+; X64-NEXT:    cmovaeq %rcx, %rsi
+; X64-NEXT:    movq %rdi, %r10
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    movq %r13, %rdi
+; X64-NEXT:    movl %r8d, %ecx
+; X64-NEXT:    shrdq %cl, %r11, %rdi
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    cmovneq %rbx, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    movq %r14, %rbx
+; X64-NEXT:    movl %r9d, %ecx
+; X64-NEXT:    shrdq %cl, %r12, %rbx
+; X64-NEXT:    testb $64, %r9b
+; X64-NEXT:    cmovneq %rax, %rbx
+; X64-NEXT:    movb $-128, %r11b
+; X64-NEXT:    subb %r8b, %r11b
+; X64-NEXT:    movq %r14, %r9
+; X64-NEXT:    movl %r11d, %ecx
+; X64-NEXT:    shlq %cl, %r9
+; X64-NEXT:    testb $64, %r11b
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovneq %rcx, %rax
+; X64-NEXT:    orq %rdi, %rax
+; X64-NEXT:    cmpl $128, %r8d
+; X64-NEXT:    cmovaeq %rbx, %rax
+; X64-NEXT:    testl %r8d, %r8d
+; X64-NEXT:    cmoveq %r13, %rax
+; X64-NEXT:    orq %rsi, %rax
+; X64-NEXT:    leal -256(%r8), %ecx
+; X64-NEXT:    movq %r10, %rbx
+; X64-NEXT:    xorl %esi, %esi
+; X64-NEXT:    shrdq %cl, %rsi, %rbx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rsi, %rbx
+; X64-NEXT:    cmpl $128, %ecx
+; X64-NEXT:    cmovaeq %rsi, %rbx
+; X64-NEXT:    xorl %r13d, %r13d
+; X64-NEXT:    testl %ecx, %ecx
+; X64-NEXT:    cmoveq %r10, %rbx
+; X64-NEXT:    cmpl $256, %r8d # imm = 0x100
+; X64-NEXT:    cmovbq %rax, %rbx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    movl %r15d, %ecx
+; X64-NEXT:    shldq %cl, %r10, %rax
+; X64-NEXT:    testb $64, %r15b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r15d
+; X64-NEXT:    cmovaeq %r13, %rax
+; X64-NEXT:    movq %r12, %rsi
+; X64-NEXT:    movl %r11d, %ecx
+; X64-NEXT:    shldq %cl, %r14, %rsi
+; X64-NEXT:    testb $64, %r11b
+; X64-NEXT:    cmovneq %r9, %rsi
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r8d
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    testl %r8d, %r8d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    cmoveq %r11, %rsi
+; X64-NEXT:    orq %rax, %rsi
+; X64-NEXT:    cmpl $256, %r8d # imm = 0x100
+; X64-NEXT:    cmovaeq %r13, %rsi
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    cmovneq %r13, %rdi
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    subb %r15b, %cl
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    shrdq %cl, %r13, %r10
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r13, %r10
+; X64-NEXT:    cmpl $128, %r15d
+; X64-NEXT:    cmovaeq %rdi, %r10
+; X64-NEXT:    movq %r14, %r9
+; X64-NEXT:    movl %r8d, %ecx
+; X64-NEXT:    shrdq %cl, %r12, %r9
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; X64-NEXT:    testl %r15d, %r15d
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    cmoveq %rax, %r10
+; X64-NEXT:    cmpl $128, %r8d
+; X64-NEXT:    cmovaeq %rax, %r9
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    orq %r10, %r9
+; X64-NEXT:    cmpl $256, %r8d # imm = 0x100
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovaeq %rcx, %r9
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testl %r8d, %r8d
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    cmoveq %r11, %rsi
+; X64-NEXT:    cmoveq %r14, %r9
+; X64-NEXT:    cmoveq %r12, %rdx
+; X64-NEXT:    movq (%rsp), %rax # 8-byte Reload
+; X64-NEXT:    addq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $1, %rax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    xorl %r11d, %r11d
+; X64-NEXT:    xorl %edi, %edi
+; X64-NEXT:    xorl %r10d, %r10d
+; X64-NEXT:    xorl %r15d, %r15d
+; X64-NEXT:    movq %rdx, %r13
+; X64-NEXT:    movq %rbp, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB4_4: # %udiv-do-while
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    shldq $1, %r9, %r13
+; X64-NEXT:    shldq $1, %rsi, %r9
+; X64-NEXT:    shldq $1, %rbx, %rsi
+; X64-NEXT:    shrq $63, %rdx
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    andl $1, %ebp
+; X64-NEXT:    leaq (%rbp,%rbx,2), %rbx
+; X64-NEXT:    shldq $1, %r12, %rcx
+; X64-NEXT:    orq %r10, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    shldq $1, %rbp, %r12
+; X64-NEXT:    orq %r10, %r12
+; X64-NEXT:    shldq $1, %r14, %rbp
+; X64-NEXT:    orq %rdi, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    shrq $63, %rax
+; X64-NEXT:    addq %r14, %r14
+; X64-NEXT:    orq %r11, %r14
+; X64-NEXT:    orl %eax, %r15d
+; X64-NEXT:    # kill: def $r15d killed $r15d killed $r15 def $r15
+; X64-NEXT:    andl $1, %r15d
+; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    sbbq %rsi, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    sbbq %r9, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    sbbq %r13, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    sbbq %rdx, %rdi
+; X64-NEXT:    andl $1, %edi
+; X64-NEXT:    negq %rdi
+; X64-NEXT:    movl %edi, %r11d
+; X64-NEXT:    andl $1, %r11d
+; X64-NEXT:    movq %rdi, %rdx
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rdi, %rbp
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; X64-NEXT:    andq (%rsp), %rdi # 8-byte Folded Reload
+; X64-NEXT:    subq %rdi, %rbx
+; X64-NEXT:    sbbq %rbp, %rsi
+; X64-NEXT:    sbbq %rax, %r9
+; X64-NEXT:    sbbq %rdx, %r13
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    addq $-1, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rbp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rdx
+; X64-NEXT:    adcq $1, %r8
+; X64-NEXT:    andl $1, %r8d
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %rdx, %rax
+; X64-NEXT:    movq %r8, %rdx
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %rbp, %rdx
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %rdi, %rdx
+; X64-NEXT:    orq %rax, %rdx
+; X64-NEXT:    movl $0, %edi
+; X64-NEXT:    movl $0, %r10d
+; X64-NEXT:    movl $0, %r15d
+; X64-NEXT:    movq %r13, %rdx
+; X64-NEXT:    movq %rcx, %rbp
+; X64-NEXT:    jne .LBB4_4
+; X64-NEXT:    jmp .LBB4_5
+; X64-NEXT:  .LBB4_7:
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rbp, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    xorl %r11d, %r11d
+; X64-NEXT:  .LBB4_5: # %udiv-loop-exit
+; X64-NEXT:    movq %rcx, %rdx
+; X64-NEXT:    shldq $1, %r12, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    orq %r10, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    shldq $1, %rbp, %r12
+; X64-NEXT:    orq %r10, %r12
+; X64-NEXT:    shldq $1, %r14, %rbp
+; X64-NEXT:    orq %r10, %rbp
+; X64-NEXT:    shrq $63, %rcx
+; X64-NEXT:    addq %r14, %r14
+; X64-NEXT:    orq %r11, %r14
+; X64-NEXT:    orl %ecx, %r10d
+; X64-NEXT:    andl $1, %r10d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    movq %r14, %rdi
+; X64-NEXT:    movq %r12, %r11
+; X64-NEXT:  .LBB4_6: # %udiv-end
+; X64-NEXT:    xorq %r13, %r10
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    xorq %rax, %r11
+; X64-NEXT:    xorq %rax, %rbp
+; X64-NEXT:    xorq %rax, %rdi
+; X64-NEXT:    subq %rax, %rdi
+; X64-NEXT:    sbbq %rax, %rbp
+; X64-NEXT:    sbbq %rax, %r11
+; X64-NEXT:    sbbq %rax, %rdx
+; X64-NEXT:    sbbq %r13, %r10
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq %rdi, (%rax)
+; X64-NEXT:    movq %rbp, 8(%rax)
+; X64-NEXT:    movq %r11, 16(%rax)
+; X64-NEXT:    movq %rdx, 24(%rax)
+; X64-NEXT:    andl $1, %r10d
+; X64-NEXT:    movb %r10b, 32(%rax)
+; X64-NEXT:    addq $88, %rsp
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r12
+; X64-NEXT:    popq %r13
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %r15
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+  %res = sdiv i257 %a, %b
+  ret i257 %res
+}
+
+define i1001 @srem1001(i1001 %a, i1001 %b) nounwind {
+; X86-LABEL: srem1001:
+; X86:       # %bb.0: # %_udiv-special-cases
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $1676, %esp # imm = 0x68C
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    shll $23, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    shll $23, %edx
+; X86-NEXT:    sarl $31, %edx
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    xorl %eax, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    xorl %eax, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    xorl %eax, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    subl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $511, %esi # imm = 0x1FF
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %eax, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %ecx
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %edi
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ebx, %edx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    andl $511, %ebp # imm = 0x1FF
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %ebp, (%esp) # 4-byte Spill
+; X86-NEXT:    orl %ebp, %esi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl %ebx, %esi
+; X86-NEXT:    orl %edi, %esi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    sete {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %edi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %ebp, %esi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %edi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl %edi, %esi
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    sete %bl
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jne .LBB5_1
+; X86-NEXT:  # %bb.2: # %_udiv-special-cases
+; X86-NEXT:    bsrl %edx, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:    addl $32, %ebx
+; X86-NEXT:    jmp .LBB5_3
+; X86-NEXT:  .LBB5_1:
+; X86-NEXT:    bsrl %eax, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:  .LBB5_3: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    jne .LBB5_4
+; X86-NEXT:  # %bb.5: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:    addl $32, %edx
+; X86-NEXT:    jmp .LBB5_6
+; X86-NEXT:  .LBB5_4:
+; X86-NEXT:    bsrl %edx, %edx
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:  .LBB5_6: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_8
+; X86-NEXT:  # %bb.7: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %edx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:  .LBB5_8: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    jne .LBB5_9
+; X86-NEXT:  # %bb.10: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:    addl $32, %edx
+; X86-NEXT:    jmp .LBB5_11
+; X86-NEXT:  .LBB5_9:
+; X86-NEXT:    bsrl %edx, %edx
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:  .LBB5_11: # %_udiv-special-cases
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    jne .LBB5_12
+; X86-NEXT:  # %bb.13: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %edi
+; X86-NEXT:    addl $32, %edi
+; X86-NEXT:    jmp .LBB5_14
+; X86-NEXT:  .LBB5_12:
+; X86-NEXT:    bsrl %eax, %edi
+; X86-NEXT:    xorl $31, %edi
+; X86-NEXT:  .LBB5_14: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_16
+; X86-NEXT:  # %bb.15: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %edi
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:  .LBB5_16: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    jne .LBB5_18
+; X86-NEXT:  # %bb.17: # %_udiv-special-cases
+; X86-NEXT:    subl $-128, %edx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB5_18: # %_udiv-special-cases
+; X86-NEXT:    addl $256, %ebp # imm = 0x100
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb %dl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jne .LBB5_19
+; X86-NEXT:  # %bb.20: # %_udiv-special-cases
+; X86-NEXT:    bsrl %ecx, %edx
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:    addl $32, %edx
+; X86-NEXT:    jmp .LBB5_21
+; X86-NEXT:  .LBB5_19:
+; X86-NEXT:    bsrl %ebx, %edx
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:  .LBB5_21: # %_udiv-special-cases
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_22
+; X86-NEXT:  # %bb.23: # %_udiv-special-cases
+; X86-NEXT:    bsrl %edx, %edi
+; X86-NEXT:    xorl $31, %edi
+; X86-NEXT:    addl $32, %edi
+; X86-NEXT:    jmp .LBB5_24
+; X86-NEXT:  .LBB5_22:
+; X86-NEXT:    bsrl %edi, %edi
+; X86-NEXT:    xorl $31, %edi
+; X86-NEXT:  .LBB5_24: # %_udiv-special-cases
+; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:    orl %ebx, %ebp
+; X86-NEXT:    jne .LBB5_26
+; X86-NEXT:  # %bb.25: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_26: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_27
+; X86-NEXT:  # %bb.28: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %edi
+; X86-NEXT:    addl $32, %edi
+; X86-NEXT:    jmp .LBB5_29
+; X86-NEXT:  .LBB5_27:
+; X86-NEXT:    bsrl %edi, %edi
+; X86-NEXT:    xorl $31, %edi
+; X86-NEXT:  .LBB5_29: # %_udiv-special-cases
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_30
+; X86-NEXT:  # %bb.31: # %_udiv-special-cases
+; X86-NEXT:    bsrl %ebp, %ebp
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:    addl $32, %ebp
+; X86-NEXT:    jmp .LBB5_32
+; X86-NEXT:  .LBB5_30:
+; X86-NEXT:    bsrl %eax, %ebp
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:  .LBB5_32: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_34
+; X86-NEXT:  # %bb.33: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %ebp
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:  .LBB5_34: # %_udiv-special-cases
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ebp
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    jne .LBB5_36
+; X86-NEXT:  # %bb.35: # %_udiv-special-cases
+; X86-NEXT:    subl $-128, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_36: # %_udiv-special-cases
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT:    orb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %edx, %ebp
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    orl %edi, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_38
+; X86-NEXT:  # %bb.37: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_38: # %_udiv-special-cases
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_40
+; X86-NEXT:  # %bb.39: # %_udiv-special-cases
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
+; X86-NEXT:  .LBB5_40: # %_udiv-special-cases
+; X86-NEXT:    addl $512, %ecx # imm = 0x200
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    testl %ebp, %ebp
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_41
+; X86-NEXT:  # %bb.42: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:    addl $32, %edx
+; X86-NEXT:    jmp .LBB5_43
+; X86-NEXT:  .LBB5_41:
+; X86-NEXT:    bsrl %ebp, %edx
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:  .LBB5_43: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne .LBB5_44
+; X86-NEXT:  # %bb.45: # %_udiv-special-cases
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    bsrl %edi, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:    addl $32, %ebx
+; X86-NEXT:    jmp .LBB5_46
+; X86-NEXT:  .LBB5_44:
+; X86-NEXT:    bsrl %eax, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:  .LBB5_46: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    jne .LBB5_48
+; X86-NEXT:  # %bb.47: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %ebx
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:  .LBB5_48: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne .LBB5_49
+; X86-NEXT:  # %bb.50: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:    addl $32, %ebx
+; X86-NEXT:    jmp .LBB5_51
+; X86-NEXT:  .LBB5_49:
+; X86-NEXT:    bsrl %eax, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:  .LBB5_51: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne .LBB5_52
+; X86-NEXT:  # %bb.53: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:    addl $32, %ebp
+; X86-NEXT:    jmp .LBB5_54
+; X86-NEXT:  .LBB5_52:
+; X86-NEXT:    bsrl %eax, %ebp
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:  .LBB5_54: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_56
+; X86-NEXT:  # %bb.55: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %ebp
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:  .LBB5_56: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    jne .LBB5_58
+; X86-NEXT:  # %bb.57: # %_udiv-special-cases
+; X86-NEXT:    subl $-128, %ebx
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:  .LBB5_58: # %_udiv-special-cases
+; X86-NEXT:    addl $256, %edx # imm = 0x100
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jne .LBB5_59
+; X86-NEXT:  # %bb.60: # %_udiv-special-cases
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    bsrl %esi, %edx
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:    addl $32, %edx
+; X86-NEXT:    jmp .LBB5_61
+; X86-NEXT:  .LBB5_59:
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    bsrl %eax, %edx
+; X86-NEXT:    xorl $31, %edx
+; X86-NEXT:  .LBB5_61: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    jne .LBB5_62
+; X86-NEXT:  # %bb.63: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:    addl $32, %ebp
+; X86-NEXT:    jmp .LBB5_64
+; X86-NEXT:  .LBB5_62:
+; X86-NEXT:    bsrl %ecx, %ebp
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:  .LBB5_64: # %_udiv-special-cases
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    orl (%esp), %eax # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_66
+; X86-NEXT:  # %bb.65: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %ebp
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:  .LBB5_66: # %_udiv-special-cases
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_67
+; X86-NEXT:  # %bb.68: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    addl $32, %eax
+; X86-NEXT:    jmp .LBB5_69
+; X86-NEXT:  .LBB5_67:
+; X86-NEXT:    bsrl %ebx, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:  .LBB5_69: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    jne .LBB5_70
+; X86-NEXT:  # %bb.71: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:    addl $32, %ebp
+; X86-NEXT:    jmp .LBB5_72
+; X86-NEXT:  .LBB5_70:
+; X86-NEXT:    bsrl %esi, %ebp
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:  .LBB5_72: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %ebx, %esi
+; X86-NEXT:    jne .LBB5_74
+; X86-NEXT:  # %bb.73: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %ebp
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB5_74: # %_udiv-special-cases
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl %ebx, %esi
+; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ecx, %ebp
+; X86-NEXT:    orl %esi, %ebp
+; X86-NEXT:    jne .LBB5_76
+; X86-NEXT:  # %bb.75: # %_udiv-special-cases
+; X86-NEXT:    subl $-128, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_76: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %ebx, %esi
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    orl %ecx, %ebp
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    orl %esi, %ebp
+; X86-NEXT:    jne .LBB5_78
+; X86-NEXT:  # %bb.77: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_78: # %_udiv-special-cases
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_80
+; X86-NEXT:  # %bb.79: # %_udiv-special-cases
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:  .LBB5_80: # %_udiv-special-cases
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_82
+; X86-NEXT:  # %bb.81: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_82: # %_udiv-special-cases
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_84
+; X86-NEXT:  # %bb.83: # %_udiv-special-cases
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_84: # %_udiv-special-cases
+; X86-NEXT:    subl $23, %eax
+; X86-NEXT:    sbbl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl $0, %ecx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_85
+; X86-NEXT:  # %bb.86: # %_udiv-special-cases
+; X86-NEXT:    bsrl %ebx, %edi
+; X86-NEXT:    xorl $31, %edi
+; X86-NEXT:    addl $32, %edi
+; X86-NEXT:    jmp .LBB5_87
+; X86-NEXT:  .LBB5_85:
+; X86-NEXT:    bsrl %esi, %edi
+; X86-NEXT:    xorl $31, %edi
+; X86-NEXT:  .LBB5_87: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_88
+; X86-NEXT:  # %bb.89: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    addl $32, %eax
+; X86-NEXT:    jmp .LBB5_90
+; X86-NEXT:  .LBB5_88:
+; X86-NEXT:    bsrl %eax, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:  .LBB5_90: # %_udiv-special-cases
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    jne .LBB5_92
+; X86-NEXT:  # %bb.91: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB5_92: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_93
+; X86-NEXT:  # %bb.94: # %_udiv-special-cases
+; X86-NEXT:    bsrl %esi, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    addl $32, %eax
+; X86-NEXT:    jmp .LBB5_95
+; X86-NEXT:  .LBB5_93:
+; X86-NEXT:    bsrl %eax, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:  .LBB5_95: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    jne .LBB5_96
+; X86-NEXT:  # %bb.97: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %ecx
+; X86-NEXT:    addl $32, %ecx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_99
+; X86-NEXT:    jmp .LBB5_100
+; X86-NEXT:  .LBB5_96:
+; X86-NEXT:    bsrl %ecx, %ecx
+; X86-NEXT:    xorl $31, %ecx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_100
+; X86-NEXT:  .LBB5_99: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB5_100: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    jne .LBB5_102
+; X86-NEXT:  # %bb.101: # %_udiv-special-cases
+; X86-NEXT:    subl $-128, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB5_102: # %_udiv-special-cases
+; X86-NEXT:    addl $256, %edi # imm = 0x100
+; X86-NEXT:    setb %al
+; X86-NEXT:    testl %ebp, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jne .LBB5_103
+; X86-NEXT:  # %bb.104: # %_udiv-special-cases
+; X86-NEXT:    bsrl %ecx, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:    addl $32, %ebx
+; X86-NEXT:    jmp .LBB5_105
+; X86-NEXT:  .LBB5_103:
+; X86-NEXT:    bsrl %ebp, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:  .LBB5_105: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_106
+; X86-NEXT:  # %bb.107: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:    addl $32, %esi
+; X86-NEXT:    jmp .LBB5_108
+; X86-NEXT:  .LBB5_106:
+; X86-NEXT:    bsrl %eax, %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:  .LBB5_108: # %_udiv-special-cases
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    orl %ebp, %edi
+; X86-NEXT:    jne .LBB5_110
+; X86-NEXT:  # %bb.109: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %esi
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:  .LBB5_110: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_111
+; X86-NEXT:  # %bb.112: # %_udiv-special-cases
+; X86-NEXT:    bsrl %edx, %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:    addl $32, %esi
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    je .LBB5_115
+; X86-NEXT:  .LBB5_114:
+; X86-NEXT:    bsrl %ecx, %edi
+; X86-NEXT:    xorl $31, %edi
+; X86-NEXT:    jmp .LBB5_116
+; X86-NEXT:  .LBB5_111:
+; X86-NEXT:    bsrl %eax, %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    jne .LBB5_114
+; X86-NEXT:  .LBB5_115: # %_udiv-special-cases
+; X86-NEXT:    bsrl %edi, %edi
+; X86-NEXT:    xorl $31, %edi
+; X86-NEXT:    addl $32, %edi
+; X86-NEXT:  .LBB5_116: # %_udiv-special-cases
+; X86-NEXT:    movl %ebx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_118
+; X86-NEXT:  # %bb.117: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %edi
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB5_118: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ecx, %ebp
+; X86-NEXT:    orl %edi, %ebp
+; X86-NEXT:    jne .LBB5_120
+; X86-NEXT:  # %bb.119: # %_udiv-special-cases
+; X86-NEXT:    subl $-128, %esi
+; X86-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; X86-NEXT:  .LBB5_120: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %ebx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %ecx, %ebp
+; X86-NEXT:    orl %esi, %ebp
+; X86-NEXT:    orl %edi, %ebp
+; X86-NEXT:    movl (%esp), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_122
+; X86-NEXT:  # %bb.121: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_122: # %_udiv-special-cases
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_124
+; X86-NEXT:  # %bb.123: # %_udiv-special-cases
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT:  .LBB5_124: # %_udiv-special-cases
+; X86-NEXT:    addl $512, %esi # imm = 0x200
+; X86-NEXT:    adcl $0, %eax
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_125
+; X86-NEXT:  # %bb.126: # %_udiv-special-cases
+; X86-NEXT:    bsrl %ebp, %edi
+; X86-NEXT:    xorl $31, %edi
+; X86-NEXT:    addl $32, %edi
+; X86-NEXT:    jmp .LBB5_127
+; X86-NEXT:  .LBB5_125:
+; X86-NEXT:    bsrl %ecx, %edi
+; X86-NEXT:    xorl $31, %edi
+; X86-NEXT:  .LBB5_127: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_128
+; X86-NEXT:  # %bb.129: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:    addl $32, %esi
+; X86-NEXT:    jmp .LBB5_130
+; X86-NEXT:  .LBB5_128:
+; X86-NEXT:    bsrl %eax, %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:  .LBB5_130: # %_udiv-special-cases
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    orl %ecx, %ebx
+; X86-NEXT:    jne .LBB5_132
+; X86-NEXT:  # %bb.131: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %esi
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:  .LBB5_132: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_133
+; X86-NEXT:  # %bb.134: # %_udiv-special-cases
+; X86-NEXT:    bsrl %ebp, %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:    addl $32, %esi
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    je .LBB5_137
+; X86-NEXT:  .LBB5_136:
+; X86-NEXT:    bsrl %ecx, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_139
+; X86-NEXT:    jmp .LBB5_140
+; X86-NEXT:  .LBB5_133:
+; X86-NEXT:    bsrl %eax, %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    jne .LBB5_136
+; X86-NEXT:  .LBB5_137: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:    addl $32, %ebx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_140
+; X86-NEXT:  .LBB5_139: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %ebx
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:  .LBB5_140: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    orl %ebx, %ebp
+; X86-NEXT:    jne .LBB5_142
+; X86-NEXT:  # %bb.141: # %_udiv-special-cases
+; X86-NEXT:    subl $-128, %esi
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:  .LBB5_142: # %_udiv-special-cases
+; X86-NEXT:    addl $256, %edi # imm = 0x100
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_143
+; X86-NEXT:  # %bb.144: # %_udiv-special-cases
+; X86-NEXT:    bsrl %ebp, %edi
+; X86-NEXT:    xorl $31, %edi
+; X86-NEXT:    addl $32, %edi
+; X86-NEXT:    jmp .LBB5_145
+; X86-NEXT:  .LBB5_143:
+; X86-NEXT:    bsrl %eax, %edi
+; X86-NEXT:    xorl $31, %edi
+; X86-NEXT:  .LBB5_145: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne .LBB5_146
+; X86-NEXT:  # %bb.147: # %_udiv-special-cases
+; X86-NEXT:    bsrl %edx, %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:    addl $32, %esi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_149
+; X86-NEXT:    jmp .LBB5_150
+; X86-NEXT:  .LBB5_146:
+; X86-NEXT:    bsrl %eax, %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_150
+; X86-NEXT:  .LBB5_149: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %esi
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:  .LBB5_150: # %_udiv-special-cases
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_151
+; X86-NEXT:  # %bb.152: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:    addl $32, %esi
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    je .LBB5_155
+; X86-NEXT:  .LBB5_154:
+; X86-NEXT:    bsrl %eax, %ebp
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:    jmp .LBB5_156
+; X86-NEXT:  .LBB5_151:
+; X86-NEXT:    bsrl %ecx, %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne .LBB5_154
+; X86-NEXT:  .LBB5_155: # %_udiv-special-cases
+; X86-NEXT:    bsrl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    xorl $31, %ebp
+; X86-NEXT:    addl $32, %ebp
+; X86-NEXT:  .LBB5_156: # %_udiv-special-cases
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    jne .LBB5_158
+; X86-NEXT:  # %bb.157: # %_udiv-special-cases
+; X86-NEXT:    addl $64, %ebp
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:  .LBB5_158: # %_udiv-special-cases
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %edi, %ebp
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    jne .LBB5_160
+; X86-NEXT:  # %bb.159: # %_udiv-special-cases
+; X86-NEXT:    subl $-128, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_160: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:    orl %edi, %ebp
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    orl %esi, %ebp
+; X86-NEXT:    jne .LBB5_162
+; X86-NEXT:  # %bb.161: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_162: # %_udiv-special-cases
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_164
+; X86-NEXT:  # %bb.163: # %_udiv-special-cases
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT:  .LBB5_164: # %_udiv-special-cases
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %edi, %esi
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    orl %edi, %esi
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_166
+; X86-NEXT:  # %bb.165: # %_udiv-special-cases
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_166: # %_udiv-special-cases
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_168
+; X86-NEXT:  # %bb.167: # %_udiv-special-cases
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_168: # %_udiv-special-cases
+; X86-NEXT:    subl $23, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl $0, %esi
+; X86-NEXT:    sbbl $0, %edi
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %eax, %eax
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    subl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl %edi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    sbbl %ebp, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl (%esp), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $1000, %eax # imm = 0x3E8
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl %ecx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %edx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %esi, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %edi, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %ebx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %ebp, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    setb %al
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    orb %al, %cl
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_170
+; X86-NEXT:  # %bb.169: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_170: # %_udiv-special-cases
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_171
+; X86-NEXT:  # %bb.4797: # %_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    andl $511, %eax # imm = 0x1FF
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    xorl $1000, %esi # imm = 0x3E8
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB5_4798
+; X86-NEXT:  # %bb.2496: # %udiv-bb1
+; X86-NEXT:    movl $1000, %ecx # imm = 0x3E8
+; X86-NEXT:    subl %edi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_2498
+; X86-NEXT:  # %bb.2497: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:  .LBB5_2498: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2500
+; X86-NEXT:  # %bb.2499: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2500: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2502
+; X86-NEXT:  # %bb.2501: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2502: # %udiv-bb1
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_2504
+; X86-NEXT:  # %bb.2503: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB5_2504: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_2506
+; X86-NEXT:  # %bb.2505: # %udiv-bb1
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB5_2506: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_2508
+; X86-NEXT:  # %bb.2507: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_2508: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2510
+; X86-NEXT:  # %bb.2509: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_2510: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_2512
+; X86-NEXT:  # %bb.2511: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_2512: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_2514
+; X86-NEXT:  # %bb.2513: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_2514: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_2516
+; X86-NEXT:  # %bb.2515: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB5_2516: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_2518
+; X86-NEXT:  # %bb.2517: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:  .LBB5_2518: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2520
+; X86-NEXT:  # %bb.2519: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2520: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_2522
+; X86-NEXT:  # %bb.2521: # %udiv-bb1
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:  .LBB5_2522: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_2524
+; X86-NEXT:  # %bb.2523: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_2524: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2526
+; X86-NEXT:  # %bb.2525: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2526: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2528
+; X86-NEXT:  # %bb.2527: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2528: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2530
+; X86-NEXT:  # %bb.2529: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2530: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %edx
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2532
+; X86-NEXT:  # %bb.2531: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2532: # %udiv-bb1
+; X86-NEXT:    movl $744, %ecx # imm = 0x2E8
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    subl %edi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2534
+; X86-NEXT:  # %bb.2533: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2534: # %udiv-bb1
+; X86-NEXT:    movl $872, %eax # imm = 0x368
+; X86-NEXT:    subl %edi, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2536
+; X86-NEXT:  # %bb.2535: # %udiv-bb1
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:  .LBB5_2536: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2538
+; X86-NEXT:  # %bb.2537: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2538: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_2540
+; X86-NEXT:  # %bb.2539: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:  .LBB5_2540: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2542
+; X86-NEXT:  # %bb.2541: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2542: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2544
+; X86-NEXT:  # %bb.2543: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2544: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_2546
+; X86-NEXT:  # %bb.2545: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2546: # %udiv-bb1
+; X86-NEXT:    movb $64, %ch
+; X86-NEXT:    subb %dl, %ch
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    jne .LBB5_2548
+; X86-NEXT:  # %bb.2547: # %udiv-bb1
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB5_2548: # %udiv-bb1
+; X86-NEXT:    movb %al, %cl
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_2550
+; X86-NEXT:  # %bb.2549: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:  .LBB5_2550: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_2552
+; X86-NEXT:  # %bb.2551:
+; X86-NEXT:    orl %edi, %esi
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_2552: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_2554
+; X86-NEXT:  # %bb.2553: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_2554: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    jne .LBB5_2556
+; X86-NEXT:  # %bb.2555: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_2556: # %udiv-bb1
+; X86-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_2558
+; X86-NEXT:  # %bb.2557:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2558: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_2560
+; X86-NEXT:  # %bb.2559: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_2560: # %udiv-bb1
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    jne .LBB5_2562
+; X86-NEXT:  # %bb.2561: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_2562: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_2564
+; X86-NEXT:  # %bb.2563:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %ecx, %ebp
+; X86-NEXT:  .LBB5_2564: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl $384, %ecx # imm = 0x180
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_2566
+; X86-NEXT:  # %bb.2565: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_2566: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    jb .LBB5_2568
+; X86-NEXT:  # %bb.2567: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2568: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2570
+; X86-NEXT:  # %bb.2569: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2570: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2572
+; X86-NEXT:  # %bb.2571: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2572: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2574
+; X86-NEXT:  # %bb.2573: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_2574: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2576
+; X86-NEXT:  # %bb.2575: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_2576: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_2578
+; X86-NEXT:  # %bb.2577:
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_2578: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_2580
+; X86-NEXT:  # %bb.2579:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2580: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_2582
+; X86-NEXT:  # %bb.2581: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_2582: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_2584
+; X86-NEXT:  # %bb.2583: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_2584: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2586
+; X86-NEXT:  # %bb.2585: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_2586: # %udiv-bb1
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2588
+; X86-NEXT:  # %bb.2587: # %udiv-bb1
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB5_2588: # %udiv-bb1
+; X86-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_2590
+; X86-NEXT:  # %bb.2589:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2590: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2592
+; X86-NEXT:  # %bb.2591: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_2592: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2594
+; X86-NEXT:  # %bb.2593: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2594: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2596
+; X86-NEXT:  # %bb.2595: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2596: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jb .LBB5_2598
+; X86-NEXT:  # %bb.2597: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2598: # %udiv-bb1
+; X86-NEXT:    addl $-512, %ecx # imm = 0xFE00
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    addl $-256, %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2600
+; X86-NEXT:  # %bb.2599: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:  .LBB5_2600: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $488, %ebx # imm = 0x1E8
+; X86-NEXT:    subl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2602
+; X86-NEXT:  # %bb.2601: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2602: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2604
+; X86-NEXT:  # %bb.2603: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2604: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2606
+; X86-NEXT:  # %bb.2605: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_2606: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $232, %ecx
+; X86-NEXT:    subl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2608
+; X86-NEXT:  # %bb.2607: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_2608: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    jb .LBB5_2610
+; X86-NEXT:  # %bb.2609: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2610: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2612
+; X86-NEXT:  # %bb.2611: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_2612: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    jb .LBB5_2614
+; X86-NEXT:  # %bb.2613: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2614: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2616
+; X86-NEXT:  # %bb.2615: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2616: # %udiv-bb1
+; X86-NEXT:    leal -256(%edx), %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    addl $-128, %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_2618
+; X86-NEXT:  # %bb.2617: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_2618: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2620
+; X86-NEXT:  # %bb.2619: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2620: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2622
+; X86-NEXT:  # %bb.2621: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2622: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2624
+; X86-NEXT:  # %bb.2623: # %udiv-bb1
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB5_2624: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2626
+; X86-NEXT:  # %bb.2625: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:  .LBB5_2626: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_2628
+; X86-NEXT:  # %bb.2627: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2628: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $360, %ecx # imm = 0x168
+; X86-NEXT:    subl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2630
+; X86-NEXT:  # %bb.2629: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_2630: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    jne .LBB5_2632
+; X86-NEXT:  # %bb.2631: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_2632: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    subb %al, %cl
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %esi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    jne .LBB5_2634
+; X86-NEXT:  # %bb.2633: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_2634: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2636
+; X86-NEXT:  # %bb.2635: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB5_2636: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jb .LBB5_2637
+; X86-NEXT:  # %bb.2638: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_2639
+; X86-NEXT:  .LBB5_171:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jmp .LBB5_4798
+; X86-NEXT:  .LBB5_2637:
+; X86-NEXT:    orl %edx, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2639: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl $640, %ebx # imm = 0x280
+; X86-NEXT:    subl %ecx, %ebx
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %esi
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2641
+; X86-NEXT:  # %bb.2640: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_2641: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_2643
+; X86-NEXT:  # %bb.2642: # %udiv-bb1
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_2643: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2645
+; X86-NEXT:  # %bb.2644: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_2645: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jb .LBB5_2646
+; X86-NEXT:  # %bb.2647: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_2648
+; X86-NEXT:  .LBB5_2646:
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2648: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %esi
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2650
+; X86-NEXT:  # %bb.2649: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_2650: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    leal -128(%ecx), %ebx
+; X86-NEXT:    negl %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2652
+; X86-NEXT:  # %bb.2651: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_2652: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2654
+; X86-NEXT:  # %bb.2653: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB5_2654: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_2656
+; X86-NEXT:  # %bb.2655:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:  .LBB5_2656: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2658
+; X86-NEXT:  # %bb.2657: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2658: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb $64, %ch
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    subb %bl, %ch
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2660
+; X86-NEXT:  # %bb.2659: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_2660: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb %bl, %cl
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %esi
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_2662
+; X86-NEXT:  # %bb.2661: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:  .LBB5_2662: # %udiv-bb1
+; X86-NEXT:    movb %bl, %cl
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2664
+; X86-NEXT:  # %bb.2663: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2664: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jb .LBB5_2665
+; X86-NEXT:  # %bb.2666: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB5_2667
+; X86-NEXT:  .LBB5_2665:
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2667: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %al, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2669
+; X86-NEXT:  # %bb.2668: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_2669: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_2671
+; X86-NEXT:  # %bb.2670: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2671: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_2673
+; X86-NEXT:  # %bb.2672: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2673: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_2675
+; X86-NEXT:  # %bb.2674:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2675: # %udiv-bb1
+; X86-NEXT:    movb %bl, %cl
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_2677
+; X86-NEXT:  # %bb.2676: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:  .LBB5_2677: # %udiv-bb1
+; X86-NEXT:    subl $128, %eax
+; X86-NEXT:    jb .LBB5_2679
+; X86-NEXT:  # %bb.2678: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2679: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %esi
+; X86-NEXT:    jb .LBB5_2681
+; X86-NEXT:  # %bb.2680: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2681: # %udiv-bb1
+; X86-NEXT:    negl %eax
+; X86-NEXT:    movb %al, %cl
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    jne .LBB5_2683
+; X86-NEXT:  # %bb.2682: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB5_2683: # %udiv-bb1
+; X86-NEXT:    movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edi
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    jne .LBB5_2685
+; X86-NEXT:  # %bb.2684: # %udiv-bb1
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:  .LBB5_2685: # %udiv-bb1
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %al, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2687
+; X86-NEXT:  # %bb.2686: # %udiv-bb1
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:  .LBB5_2687: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_2689
+; X86-NEXT:  # %bb.2688:
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB5_2689: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_2691
+; X86-NEXT:  # %bb.2690: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB5_2691: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    jae .LBB5_2693
+; X86-NEXT:  # %bb.2692:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2693: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2695
+; X86-NEXT:  # %bb.2694: # %udiv-bb1
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_2695: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_2697
+; X86-NEXT:  # %bb.2696: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:  .LBB5_2697: # %udiv-bb1
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_2699
+; X86-NEXT:  # %bb.2698: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_2699: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2701
+; X86-NEXT:  # %bb.2700: # %udiv-bb1
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB5_2701: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2703
+; X86-NEXT:  # %bb.2702: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2703: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2705
+; X86-NEXT:  # %bb.2704: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2705: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2707
+; X86-NEXT:  # %bb.2706: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2707: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2709
+; X86-NEXT:  # %bb.2708: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2709: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2711
+; X86-NEXT:  # %bb.2710: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2711: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2713
+; X86-NEXT:  # %bb.2712:
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_2713: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2715
+; X86-NEXT:  # %bb.2714: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2715: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_2717
+; X86-NEXT:  # %bb.2716: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2717: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jae .LBB5_2719
+; X86-NEXT:  # %bb.2718:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2719: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jae .LBB5_2721
+; X86-NEXT:  # %bb.2720:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2721: # %udiv-bb1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_2723
+; X86-NEXT:  # %bb.2722: # %udiv-bb1
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_2723: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2725
+; X86-NEXT:  # %bb.2724: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2725: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jae .LBB5_2727
+; X86-NEXT:  # %bb.2726:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2727: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2729
+; X86-NEXT:  # %bb.2728: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_2729: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    je .LBB5_2731
+; X86-NEXT:  # %bb.2730: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2731: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_2732
+; X86-NEXT:  # %bb.2733: # %udiv-bb1
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    jne .LBB5_2734
+; X86-NEXT:  .LBB5_2735: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    jb .LBB5_2736
+; X86-NEXT:  .LBB5_2737: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB5_2738
+; X86-NEXT:  .LBB5_2732: # %udiv-bb1
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    je .LBB5_2735
+; X86-NEXT:  .LBB5_2734: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    jae .LBB5_2737
+; X86-NEXT:  .LBB5_2736:
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2738: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_2740
+; X86-NEXT:  # %bb.2739: # %udiv-bb1
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:  .LBB5_2740: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_2742
+; X86-NEXT:  # %bb.2741: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2742: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_2744
+; X86-NEXT:  # %bb.2743: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2744: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jb .LBB5_2746
+; X86-NEXT:  # %bb.2745: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2746: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %ebx
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    jne .LBB5_2748
+; X86-NEXT:  # %bb.2747: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:  .LBB5_2748: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %esi
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    jne .LBB5_2750
+; X86-NEXT:  # %bb.2749: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_2750: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2752
+; X86-NEXT:  # %bb.2751:
+; X86-NEXT:    orl %ebp, %edx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_2752: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2754
+; X86-NEXT:  # %bb.2753: # %udiv-bb1
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:  .LBB5_2754: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2756
+; X86-NEXT:  # %bb.2755: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2756: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2758
+; X86-NEXT:  # %bb.2757: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2758: # %udiv-bb1
+; X86-NEXT:    cmpl $128, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2760
+; X86-NEXT:  # %bb.2759:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2760: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_2762
+; X86-NEXT:  # %bb.2761: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_2762: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    jne .LBB5_2764
+; X86-NEXT:  # %bb.2763: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:  .LBB5_2764: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2766
+; X86-NEXT:  # %bb.2765: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:  .LBB5_2766: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_2768
+; X86-NEXT:  # %bb.2767:
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_2768: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2770
+; X86-NEXT:  # %bb.2769: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2770: # %udiv-bb1
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    jne .LBB5_2772
+; X86-NEXT:  # %bb.2771: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2772: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    je .LBB5_2774
+; X86-NEXT:  # %bb.2773: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_2774: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_2776
+; X86-NEXT:  # %bb.2775: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2776: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_2778
+; X86-NEXT:  # %bb.2777: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2778: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_2780
+; X86-NEXT:  # %bb.2779:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2780: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    jne .LBB5_2782
+; X86-NEXT:  # %bb.2781: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_2782: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2784
+; X86-NEXT:  # %bb.2783:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2784: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2786
+; X86-NEXT:  # %bb.2785: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2786: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2788
+; X86-NEXT:  # %bb.2787: # %udiv-bb1
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB5_2788: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB5_2790
+; X86-NEXT:  # %bb.2789: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_2790: # %udiv-bb1
+; X86-NEXT:    cmpl $128, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2791
+; X86-NEXT:  # %bb.2792: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_2793
+; X86-NEXT:  .LBB5_2791:
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2793: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2795
+; X86-NEXT:  # %bb.2794: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2795: # %udiv-bb1
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_2797
+; X86-NEXT:  # %bb.2796: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:  .LBB5_2797: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2799
+; X86-NEXT:  # %bb.2798: # %udiv-bb1
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_2799: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2801
+; X86-NEXT:  # %bb.2800: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2801: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2803
+; X86-NEXT:  # %bb.2802: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_2803: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2805
+; X86-NEXT:  # %bb.2804: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_2805: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2807
+; X86-NEXT:  # %bb.2806: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2807: # %udiv-bb1
+; X86-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2809
+; X86-NEXT:  # %bb.2808: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2809: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2811
+; X86-NEXT:  # %bb.2810: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2811: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_2813
+; X86-NEXT:  # %bb.2812: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:  .LBB5_2813: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2815
+; X86-NEXT:  # %bb.2814: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_2815: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2817
+; X86-NEXT:  # %bb.2816: # %udiv-bb1
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:  .LBB5_2817: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_2819
+; X86-NEXT:  # %bb.2818: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_2819: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2821
+; X86-NEXT:  # %bb.2820: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2821: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2823
+; X86-NEXT:  # %bb.2822: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_2823: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2825
+; X86-NEXT:  # %bb.2824: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2825: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2827
+; X86-NEXT:  # %bb.2826: # %udiv-bb1
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_2827: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2829
+; X86-NEXT:  # %bb.2828: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_2829: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $616, %ecx # imm = 0x268
+; X86-NEXT:    subl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2831
+; X86-NEXT:  # %bb.2830: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_2831: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2833
+; X86-NEXT:  # %bb.2832: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_2833: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2835
+; X86-NEXT:  # %bb.2834: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_2835: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2837
+; X86-NEXT:  # %bb.2836: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2837: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    je .LBB5_2838
+; X86-NEXT:  # %bb.2839: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2840
+; X86-NEXT:  .LBB5_2841: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2843
+; X86-NEXT:  .LBB5_2842: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2843: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2845
+; X86-NEXT:  # %bb.2844: # %udiv-bb1
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:  .LBB5_2845: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2847
+; X86-NEXT:  # %bb.2846: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2847: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    jb .LBB5_2849
+; X86-NEXT:  # %bb.2848: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2849: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    je .LBB5_2850
+; X86-NEXT:  # %bb.2851: # %udiv-bb1
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    jae .LBB5_2852
+; X86-NEXT:  .LBB5_2853: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_2854
+; X86-NEXT:  .LBB5_2855: # %udiv-bb1
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    jb .LBB5_2857
+; X86-NEXT:  .LBB5_2856: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2857: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %edx
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2859
+; X86-NEXT:  # %bb.2858: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_2859: # %udiv-bb1
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2861
+; X86-NEXT:  # %bb.2860: # %udiv-bb1
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_2861: # %udiv-bb1
+; X86-NEXT:    movl $768, %ecx # imm = 0x300
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    subl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    addl $-128, %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_2863
+; X86-NEXT:  # %bb.2862: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_2863: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_2865
+; X86-NEXT:  # %bb.2864: # %udiv-bb1
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:  .LBB5_2865: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_2867
+; X86-NEXT:  # %bb.2866: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_2867: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jb .LBB5_2868
+; X86-NEXT:  # %bb.2869: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB5_2870
+; X86-NEXT:  .LBB5_2838: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2841
+; X86-NEXT:  .LBB5_2840: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2842
+; X86-NEXT:    jmp .LBB5_2843
+; X86-NEXT:  .LBB5_2850: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    jb .LBB5_2853
+; X86-NEXT:  .LBB5_2852: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_2855
+; X86-NEXT:  .LBB5_2854: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    jae .LBB5_2856
+; X86-NEXT:    jmp .LBB5_2857
+; X86-NEXT:  .LBB5_2868:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:  .LBB5_2870: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $256, %edx # imm = 0x100
+; X86-NEXT:    jb .LBB5_2872
+; X86-NEXT:  # %bb.2871: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2872: # %udiv-bb1
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %esi
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2874
+; X86-NEXT:  # %bb.2873: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_2874: # %udiv-bb1
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2876
+; X86-NEXT:  # %bb.2875: # %udiv-bb1
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:  .LBB5_2876: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jae .LBB5_2878
+; X86-NEXT:  # %bb.2877:
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_2878: # %udiv-bb1
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    je .LBB5_2880
+; X86-NEXT:  # %bb.2879: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_2880: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2882
+; X86-NEXT:  # %bb.2881: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2882: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_2884
+; X86-NEXT:  # %bb.2883:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2884: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_2886
+; X86-NEXT:  # %bb.2885: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2886: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2888
+; X86-NEXT:  # %bb.2887: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2888: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shldl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    jne .LBB5_2890
+; X86-NEXT:  # %bb.2889: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2890: # %udiv-bb1
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %edx
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2892
+; X86-NEXT:  # %bb.2891: # %udiv-bb1
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_2892: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    jne .LBB5_2894
+; X86-NEXT:  # %bb.2893: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:  .LBB5_2894: # %udiv-bb1
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_2896
+; X86-NEXT:  # %bb.2895: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2896: # %udiv-bb1
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    subb %dl, %cl
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2898
+; X86-NEXT:  # %bb.2897: # %udiv-bb1
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:  .LBB5_2898: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_2900
+; X86-NEXT:  # %bb.2899:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2900: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %esi
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2902
+; X86-NEXT:  # %bb.2901: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_2902: # %udiv-bb1
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %al, %cl
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2904
+; X86-NEXT:  # %bb.2903: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB5_2904: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_2906
+; X86-NEXT:  # %bb.2905:
+; X86-NEXT:    orl %ecx, %ebx
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:  .LBB5_2906: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2908
+; X86-NEXT:  # %bb.2907: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_2908: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_2910
+; X86-NEXT:  # %bb.2909: # %udiv-bb1
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:  .LBB5_2910: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2912
+; X86-NEXT:  # %bb.2911: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2912: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jb .LBB5_2914
+; X86-NEXT:  # %bb.2913: # %udiv-bb1
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB5_2914: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2916
+; X86-NEXT:  # %bb.2915: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2916: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_2918
+; X86-NEXT:  # %bb.2917: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2918: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_2920
+; X86-NEXT:  # %bb.2919: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_2920: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2922
+; X86-NEXT:  # %bb.2921: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_2922: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2924
+; X86-NEXT:  # %bb.2923: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_2924: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2926
+; X86-NEXT:  # %bb.2925: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_2926: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2928
+; X86-NEXT:  # %bb.2927: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_2928: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2930
+; X86-NEXT:  # %bb.2929: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2930: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2932
+; X86-NEXT:  # %bb.2931: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2932: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2934
+; X86-NEXT:  # %bb.2933: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB5_2934: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2936
+; X86-NEXT:  # %bb.2935: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2936: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2938
+; X86-NEXT:  # %bb.2937: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB5_2938: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    jne .LBB5_2940
+; X86-NEXT:  # %bb.2939: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:  .LBB5_2940: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb %dl, %cl
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_2942
+; X86-NEXT:  # %bb.2941: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_2942: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_2944
+; X86-NEXT:  # %bb.2943: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB5_2944: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_2946
+; X86-NEXT:  # %bb.2945: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_2946: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB5_2948
+; X86-NEXT:  # %bb.2947: # %udiv-bb1
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_2948: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2950
+; X86-NEXT:  # %bb.2949: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_2950: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2952
+; X86-NEXT:  # %bb.2951: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_2952: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_2954
+; X86-NEXT:  # %bb.2953: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_2954: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_2956
+; X86-NEXT:  # %bb.2955: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_2956: # %udiv-bb1
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2958
+; X86-NEXT:  # %bb.2957: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2958: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2960
+; X86-NEXT:  # %bb.2959: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2960: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_2962
+; X86-NEXT:  # %bb.2961: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_2962: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2964
+; X86-NEXT:  # %bb.2963: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2964: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2966
+; X86-NEXT:  # %bb.2965: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2966: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    jne .LBB5_2968
+; X86-NEXT:  # %bb.2967: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2968: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2970
+; X86-NEXT:  # %bb.2969: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2970: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2972
+; X86-NEXT:  # %bb.2971: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2972: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2974
+; X86-NEXT:  # %bb.2973: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2974: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2976
+; X86-NEXT:  # %bb.2975: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2976: # %udiv-bb1
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_2978
+; X86-NEXT:  # %bb.2977: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2978: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT:    jb .LBB5_2980
+; X86-NEXT:  # %bb.2979: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2980: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    je .LBB5_2981
+; X86-NEXT:  # %bb.2982: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_2983
+; X86-NEXT:  .LBB5_2984: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2986
+; X86-NEXT:  .LBB5_2985: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2986: # %udiv-bb1
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2988
+; X86-NEXT:  # %bb.2987: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2988: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2990
+; X86-NEXT:  # %bb.2989: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2990: # %udiv-bb1
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2992
+; X86-NEXT:  # %bb.2991: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_2992: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %esi
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2994
+; X86-NEXT:  # %bb.2993: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_2994: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %esi
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    jne .LBB5_2996
+; X86-NEXT:  # %bb.2995: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_2996: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_2998
+; X86-NEXT:  # %bb.2997:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2998: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_3000
+; X86-NEXT:  # %bb.2999: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3000: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_3002
+; X86-NEXT:  # %bb.3001: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3002: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3004
+; X86-NEXT:  # %bb.3003: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3004: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3006
+; X86-NEXT:  # %bb.3005: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3006: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3008
+; X86-NEXT:  # %bb.3007: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_3008: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    jne .LBB5_3010
+; X86-NEXT:  # %bb.3009: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3010: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3012
+; X86-NEXT:  # %bb.3011: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3012: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    jne .LBB5_3014
+; X86-NEXT:  # %bb.3013: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3014: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3016
+; X86-NEXT:  # %bb.3015: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3016: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3018
+; X86-NEXT:  # %bb.3017: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_3018: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3020
+; X86-NEXT:  # %bb.3019: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3020: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3022
+; X86-NEXT:  # %bb.3021: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3022: # %udiv-bb1
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3024
+; X86-NEXT:  # %bb.3023: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3024: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_3026
+; X86-NEXT:  # %bb.3025: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_3026: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3028
+; X86-NEXT:  # %bb.3027: # %udiv-bb1
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_3028: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3030
+; X86-NEXT:  # %bb.3029:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3030: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    jne .LBB5_3032
+; X86-NEXT:  # %bb.3031: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB5_3032: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3034
+; X86-NEXT:  # %bb.3033: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3034: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3036
+; X86-NEXT:  # %bb.3035: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3036: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB5_3038
+; X86-NEXT:  # %bb.3037: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_3038: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_3040
+; X86-NEXT:  # %bb.3039:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ebp, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3040: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3042
+; X86-NEXT:  # %bb.3041: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3042: # %udiv-bb1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_3044
+; X86-NEXT:  # %bb.3043: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_3044: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_3046
+; X86-NEXT:  # %bb.3045: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3046: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3048
+; X86-NEXT:  # %bb.3047: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_3048: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_3049
+; X86-NEXT:  # %bb.3050: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_3051
+; X86-NEXT:  .LBB5_2981: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_2984
+; X86-NEXT:  .LBB5_2983: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2985
+; X86-NEXT:    jmp .LBB5_2986
+; X86-NEXT:  .LBB5_3049:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3051: # %udiv-bb1
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_3053
+; X86-NEXT:  # %bb.3052: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3053: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3055
+; X86-NEXT:  # %bb.3054:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:  .LBB5_3055: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3057
+; X86-NEXT:  # %bb.3056: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3057: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %esi
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3059
+; X86-NEXT:  # %bb.3058: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_3059: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edi
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    jne .LBB5_3061
+; X86-NEXT:  # %bb.3060: # %udiv-bb1
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB5_3061: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    je .LBB5_3063
+; X86-NEXT:  # %bb.3062: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_3063: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_3065
+; X86-NEXT:  # %bb.3064: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3065: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_3067
+; X86-NEXT:  # %bb.3066:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3067: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3069
+; X86-NEXT:  # %bb.3068: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3069: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB5_3070
+; X86-NEXT:  # %bb.3071: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3072
+; X86-NEXT:  .LBB5_3073: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3075
+; X86-NEXT:  .LBB5_3074: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3075: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_3077
+; X86-NEXT:  # %bb.3076: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3077: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    subl $128, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jb .LBB5_3079
+; X86-NEXT:  # %bb.3078: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3079: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    jne .LBB5_3081
+; X86-NEXT:  # %bb.3080: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3081: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3083
+; X86-NEXT:  # %bb.3082: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_3083: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3085
+; X86-NEXT:  # %bb.3084: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3085: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3087
+; X86-NEXT:  # %bb.3086: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3087: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3089
+; X86-NEXT:  # %bb.3088: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3089: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jb .LBB5_3090
+; X86-NEXT:  # %bb.3091: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3093
+; X86-NEXT:    jmp .LBB5_3094
+; X86-NEXT:  .LBB5_3070: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3073
+; X86-NEXT:  .LBB5_3072:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3074
+; X86-NEXT:    jmp .LBB5_3075
+; X86-NEXT:  .LBB5_3090:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3094
+; X86-NEXT:  .LBB5_3093: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3094: # %udiv-bb1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_3096
+; X86-NEXT:  # %bb.3095: # %udiv-bb1
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:  .LBB5_3096: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_3097
+; X86-NEXT:  # %bb.3098: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_3099
+; X86-NEXT:  .LBB5_3100: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3102
+; X86-NEXT:  .LBB5_3101:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB5_3102: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3104
+; X86-NEXT:  # %bb.3103: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3104: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
+; X86-NEXT:    jne .LBB5_3106
+; X86-NEXT:  # %bb.3105: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_3106: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3108
+; X86-NEXT:  # %bb.3107: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_3108: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3110
+; X86-NEXT:  # %bb.3109: # %udiv-bb1
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_3110: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3112
+; X86-NEXT:  # %bb.3111: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3112: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3114
+; X86-NEXT:  # %bb.3113: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3114: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3116
+; X86-NEXT:  # %bb.3115: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3116: # %udiv-bb1
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3118
+; X86-NEXT:  # %bb.3117: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3118: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3119
+; X86-NEXT:  # %bb.3120: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jae .LBB5_3121
+; X86-NEXT:  .LBB5_3122: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_3124
+; X86-NEXT:  .LBB5_3123: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3124: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3125
+; X86-NEXT:  # %bb.3126: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3127
+; X86-NEXT:  .LBB5_3128: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3130
+; X86-NEXT:  .LBB5_3129: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3130: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3132
+; X86-NEXT:  # %bb.3131:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3132: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3134
+; X86-NEXT:  # %bb.3133: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3134: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3136
+; X86-NEXT:  # %bb.3135: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_3136: # %udiv-bb1
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    jne .LBB5_3138
+; X86-NEXT:  # %bb.3137: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_3138: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_3140
+; X86-NEXT:  # %bb.3139:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3140: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3142
+; X86-NEXT:  # %bb.3141:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3142: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3144
+; X86-NEXT:  # %bb.3143: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_3144: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3146
+; X86-NEXT:  # %bb.3145: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3146: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3148
+; X86-NEXT:  # %bb.3147:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3148: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jae .LBB5_3150
+; X86-NEXT:  # %bb.3149:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3150: # %udiv-bb1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_3152
+; X86-NEXT:  # %bb.3151: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_3152: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_3154
+; X86-NEXT:  # %bb.3153: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_3154: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %dl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3156
+; X86-NEXT:  # %bb.3155: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_3156: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3158
+; X86-NEXT:  # %bb.3157: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:  .LBB5_3158: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jb .LBB5_3159
+; X86-NEXT:  # %bb.3160: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB5_3161
+; X86-NEXT:  .LBB5_3097: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_3100
+; X86-NEXT:  .LBB5_3099: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3101
+; X86-NEXT:    jmp .LBB5_3102
+; X86-NEXT:  .LBB5_3119: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_3122
+; X86-NEXT:  .LBB5_3121: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_3123
+; X86-NEXT:    jmp .LBB5_3124
+; X86-NEXT:  .LBB5_3125: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3128
+; X86-NEXT:  .LBB5_3127: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3129
+; X86-NEXT:    jmp .LBB5_3130
+; X86-NEXT:  .LBB5_3159:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:  .LBB5_3161: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_3163
+; X86-NEXT:  # %bb.3162: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3163: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_3165
+; X86-NEXT:  # %bb.3164: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3165: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_3167
+; X86-NEXT:  # %bb.3166: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_3167: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %ebp
+; X86-NEXT:    jae .LBB5_3169
+; X86-NEXT:  # %bb.3168:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3169: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    je .LBB5_3171
+; X86-NEXT:  # %bb.3170: # %udiv-bb1
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:  .LBB5_3171: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %ebp
+; X86-NEXT:    jb .LBB5_3172
+; X86-NEXT:  # %bb.3173: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_3174
+; X86-NEXT:  .LBB5_3172:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3174: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_3175
+; X86-NEXT:  # %bb.3176: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3177
+; X86-NEXT:  .LBB5_3178: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3180
+; X86-NEXT:  .LBB5_3179:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3180: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    je .LBB5_3182
+; X86-NEXT:  # %bb.3181: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_3182: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3183
+; X86-NEXT:  # %bb.3184: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3185
+; X86-NEXT:  .LBB5_3186: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3187
+; X86-NEXT:  .LBB5_3188: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3190
+; X86-NEXT:  .LBB5_3189: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3190: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_3192
+; X86-NEXT:  # %bb.3191: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3192: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %ebp
+; X86-NEXT:    jb .LBB5_3194
+; X86-NEXT:  # %bb.3193: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3194: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3196
+; X86-NEXT:  # %bb.3195: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3196: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3198
+; X86-NEXT:  # %bb.3197: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3198: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jae .LBB5_3200
+; X86-NEXT:  # %bb.3199:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3200: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jae .LBB5_3201
+; X86-NEXT:  # %bb.3202: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3203
+; X86-NEXT:  .LBB5_3204: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jae .LBB5_3206
+; X86-NEXT:  .LBB5_3205:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3206: # %udiv-bb1
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %esi
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3208
+; X86-NEXT:  # %bb.3207: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_3208: # %udiv-bb1
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_3210
+; X86-NEXT:  # %bb.3209: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_3210: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3212
+; X86-NEXT:  # %bb.3211: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3212: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jae .LBB5_3214
+; X86-NEXT:  # %bb.3213:
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_3214: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    je .LBB5_3216
+; X86-NEXT:  # %bb.3215: # %udiv-bb1
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:  .LBB5_3216: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %esi
+; X86-NEXT:    jb .LBB5_3217
+; X86-NEXT:  # %bb.3218: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3220
+; X86-NEXT:    jmp .LBB5_3221
+; X86-NEXT:  .LBB5_3175: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3178
+; X86-NEXT:  .LBB5_3177:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %ecx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3179
+; X86-NEXT:    jmp .LBB5_3180
+; X86-NEXT:  .LBB5_3183: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3186
+; X86-NEXT:  .LBB5_3185: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3188
+; X86-NEXT:  .LBB5_3187: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3189
+; X86-NEXT:    jmp .LBB5_3190
+; X86-NEXT:  .LBB5_3201: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3204
+; X86-NEXT:  .LBB5_3203: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jb .LBB5_3205
+; X86-NEXT:    jmp .LBB5_3206
+; X86-NEXT:  .LBB5_3217:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3221
+; X86-NEXT:  .LBB5_3220:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3221: # %udiv-bb1
+; X86-NEXT:    testl %ebp, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB5_3223
+; X86-NEXT:  # %bb.3222: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_3223: # %udiv-bb1
+; X86-NEXT:    cmpl $128, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3225
+; X86-NEXT:  # %bb.3224:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3225: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    je .LBB5_3227
+; X86-NEXT:  # %bb.3226: # %udiv-bb1
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_3227: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    je .LBB5_3229
+; X86-NEXT:  # %bb.3228: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3229: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_3231
+; X86-NEXT:  # %bb.3230: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_3231: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3233
+; X86-NEXT:  # %bb.3232:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3233: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    je .LBB5_3235
+; X86-NEXT:  # %bb.3234: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_3235: # %udiv-bb1
+; X86-NEXT:    cmpl $0, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_3237
+; X86-NEXT:  # %bb.3236: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:  .LBB5_3237: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    je .LBB5_3239
+; X86-NEXT:  # %bb.3238: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_3239: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_3241
+; X86-NEXT:  # %bb.3240: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3241: # %udiv-bb1
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3243
+; X86-NEXT:  # %bb.3242:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl %esi, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3243: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_3245
+; X86-NEXT:  # %bb.3244: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3245: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3247
+; X86-NEXT:  # %bb.3246:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3247: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_3249
+; X86-NEXT:  # %bb.3248:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl %ebp, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3249: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    je .LBB5_3251
+; X86-NEXT:  # %bb.3250: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_3251: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_3253
+; X86-NEXT:  # %bb.3252:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:  .LBB5_3253: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_3255
+; X86-NEXT:  # %bb.3254: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_3255: # %udiv-bb1
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_3257
+; X86-NEXT:  # %bb.3256:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3257: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_3259
+; X86-NEXT:  # %bb.3258: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_3259: # %udiv-bb1
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_3261
+; X86-NEXT:  # %bb.3260: # %udiv-bb1
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:  .LBB5_3261: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    negl %edx
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    subb %dl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_3263
+; X86-NEXT:  # %bb.3262: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_3263: # %udiv-bb1
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_3265
+; X86-NEXT:  # %bb.3264:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3265: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_3267
+; X86-NEXT:  # %bb.3266: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_3267: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3269
+; X86-NEXT:  # %bb.3268: # %udiv-bb1
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:  .LBB5_3269: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3271
+; X86-NEXT:  # %bb.3270: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:  .LBB5_3271: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB5_3273
+; X86-NEXT:  # %bb.3272: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3273: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3275
+; X86-NEXT:  # %bb.3274: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_3275: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3277
+; X86-NEXT:  # %bb.3276: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_3277: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3279
+; X86-NEXT:  # %bb.3278: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_3279: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3281
+; X86-NEXT:  # %bb.3280: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3281: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    jne .LBB5_3283
+; X86-NEXT:  # %bb.3282: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB5_3283: # %udiv-bb1
+; X86-NEXT:    movl $104, %ecx
+; X86-NEXT:    subl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3285
+; X86-NEXT:  # %bb.3284: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_3285: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    jne .LBB5_3287
+; X86-NEXT:  # %bb.3286: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_3287: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3289
+; X86-NEXT:  # %bb.3288:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3289: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3291
+; X86-NEXT:  # %bb.3290:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3291: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3292
+; X86-NEXT:  # %bb.3293: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_3294
+; X86-NEXT:  .LBB5_3295: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3296
+; X86-NEXT:  .LBB5_3297: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_3298
+; X86-NEXT:  .LBB5_3292: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_3295
+; X86-NEXT:  .LBB5_3294: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3297
+; X86-NEXT:  .LBB5_3296:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3298: # %udiv-bb1
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3300
+; X86-NEXT:  # %bb.3299: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:  .LBB5_3300: # %udiv-bb1
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3302
+; X86-NEXT:  # %bb.3301:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3302: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_3303
+; X86-NEXT:  # %bb.3304: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB5_3305
+; X86-NEXT:  .LBB5_3303:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3305: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_3307
+; X86-NEXT:  # %bb.3306: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_3307: # %udiv-bb1
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3309
+; X86-NEXT:  # %bb.3308: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3309: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3311
+; X86-NEXT:  # %bb.3310: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3311: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3313
+; X86-NEXT:  # %bb.3312:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3313: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    je .LBB5_3315
+; X86-NEXT:  # %bb.3314: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_3315: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jb .LBB5_3317
+; X86-NEXT:  # %bb.3316: # %udiv-bb1
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:  .LBB5_3317: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3319
+; X86-NEXT:  # %bb.3318: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3319: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3321
+; X86-NEXT:  # %bb.3320: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:  .LBB5_3321: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3323
+; X86-NEXT:  # %bb.3322: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_3323: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_3325
+; X86-NEXT:  # %bb.3324: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_3325: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3327
+; X86-NEXT:  # %bb.3326: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3327: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3329
+; X86-NEXT:  # %bb.3328: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3329: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    je .LBB5_3330
+; X86-NEXT:  # %bb.3331: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3332
+; X86-NEXT:  .LBB5_3333: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3334
+; X86-NEXT:  .LBB5_3335: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_3337
+; X86-NEXT:  .LBB5_3336: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3337: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_3339
+; X86-NEXT:  # %bb.3338: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3339: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3341
+; X86-NEXT:  # %bb.3340: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3341: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_3343
+; X86-NEXT:  # %bb.3342: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3343: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3345
+; X86-NEXT:  # %bb.3344: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_3345: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3347
+; X86-NEXT:  # %bb.3346: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3347: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3349
+; X86-NEXT:  # %bb.3348: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3349: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3351
+; X86-NEXT:  # %bb.3350: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3351: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3353
+; X86-NEXT:  # %bb.3352: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3353: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3355
+; X86-NEXT:  # %bb.3354: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3355: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3357
+; X86-NEXT:  # %bb.3356: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3357: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3359
+; X86-NEXT:  # %bb.3358: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3359: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3361
+; X86-NEXT:  # %bb.3360: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3361: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    jne .LBB5_3363
+; X86-NEXT:  # %bb.3362: # %udiv-bb1
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB5_3363: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3365
+; X86-NEXT:  # %bb.3364: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3365: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3367
+; X86-NEXT:  # %bb.3366: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3367: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3369
+; X86-NEXT:  # %bb.3368:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3369: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3371
+; X86-NEXT:  # %bb.3370:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3371: # %udiv-bb1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_3373
+; X86-NEXT:  # %bb.3372: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_3373: # %udiv-bb1
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_3375
+; X86-NEXT:  # %bb.3374: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3375: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_3377
+; X86-NEXT:  # %bb.3376:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3377: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    jae .LBB5_3379
+; X86-NEXT:  # %bb.3378:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3379: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3381
+; X86-NEXT:  # %bb.3380: # %udiv-bb1
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB5_3381: # %udiv-bb1
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3383
+; X86-NEXT:  # %bb.3382: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB5_3383: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_3385
+; X86-NEXT:  # %bb.3384:
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:  .LBB5_3385: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_3387
+; X86-NEXT:  # %bb.3386:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3387: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    jae .LBB5_3389
+; X86-NEXT:  # %bb.3388:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3389: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_3391
+; X86-NEXT:  # %bb.3390: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_3391: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_3393
+; X86-NEXT:  # %bb.3392: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3393: # %udiv-bb1
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_3395
+; X86-NEXT:  # %bb.3394: # %udiv-bb1
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:  .LBB5_3395: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3397
+; X86-NEXT:  # %bb.3396:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3397: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3399
+; X86-NEXT:  # %bb.3398:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3399: # %udiv-bb1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_3401
+; X86-NEXT:  # %bb.3400: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_3401: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3402
+; X86-NEXT:  # %bb.3403: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3404
+; X86-NEXT:  .LBB5_3405: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3407
+; X86-NEXT:  .LBB5_3406: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3407: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    je .LBB5_3409
+; X86-NEXT:  # %bb.3408: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_3409: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    je .LBB5_3411
+; X86-NEXT:  # %bb.3410: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3411: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, %edi
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    jae .LBB5_3412
+; X86-NEXT:  # %bb.3413: # %udiv-bb1
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_3414
+; X86-NEXT:  .LBB5_3415: # %udiv-bb1
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_3417
+; X86-NEXT:  .LBB5_3416:
+; X86-NEXT:    orl %eax, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3417: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_3419
+; X86-NEXT:  # %bb.3418: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3419: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    je .LBB5_3420
+; X86-NEXT:  # %bb.3421: # %udiv-bb1
+; X86-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3422
+; X86-NEXT:  .LBB5_3423: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3425
+; X86-NEXT:  .LBB5_3424: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3425: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_3427
+; X86-NEXT:  # %bb.3426: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3427: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    je .LBB5_3428
+; X86-NEXT:  # %bb.3429: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3430
+; X86-NEXT:  .LBB5_3431: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3433
+; X86-NEXT:  .LBB5_3432: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3433: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3435
+; X86-NEXT:  # %bb.3434: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3435: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_3437
+; X86-NEXT:  # %bb.3436: # %udiv-bb1
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:  .LBB5_3437: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %edx
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3439
+; X86-NEXT:  # %bb.3438: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_3439: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3441
+; X86-NEXT:  # %bb.3440: # %udiv-bb1
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_3441: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3443
+; X86-NEXT:  # %bb.3442: # %udiv-bb1
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_3443: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3445
+; X86-NEXT:  # %bb.3444: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:  .LBB5_3445: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %edx
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3447
+; X86-NEXT:  # %bb.3446: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB5_3447: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3449
+; X86-NEXT:  # %bb.3448: # %udiv-bb1
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB5_3449: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3451
+; X86-NEXT:  # %bb.3450: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3451: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3453
+; X86-NEXT:  # %bb.3452: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3453: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3455
+; X86-NEXT:  # %bb.3454: # %udiv-bb1
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB5_3455: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3457
+; X86-NEXT:  # %bb.3456: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:  .LBB5_3457: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3459
+; X86-NEXT:  # %bb.3458: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3459: # %udiv-bb1
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3461
+; X86-NEXT:  # %bb.3460: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3461: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_3463
+; X86-NEXT:  # %bb.3462:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3463: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3465
+; X86-NEXT:  # %bb.3464:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3465: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_3467
+; X86-NEXT:  # %bb.3466:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3467: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3469
+; X86-NEXT:  # %bb.3468:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3469: # %udiv-bb1
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_3471
+; X86-NEXT:  # %bb.3470: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_3471: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3473
+; X86-NEXT:  # %bb.3472:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3473: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3474
+; X86-NEXT:  # %bb.3475: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_3476
+; X86-NEXT:  .LBB5_3477: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3478
+; X86-NEXT:  .LBB5_3479: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    jmp .LBB5_3480
+; X86-NEXT:  .LBB5_3330: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3333
+; X86-NEXT:  .LBB5_3332: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3335
+; X86-NEXT:  .LBB5_3334: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_3336
+; X86-NEXT:    jmp .LBB5_3337
+; X86-NEXT:  .LBB5_3402: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3405
+; X86-NEXT:  .LBB5_3404: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3406
+; X86-NEXT:    jmp .LBB5_3407
+; X86-NEXT:  .LBB5_3412: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_3415
+; X86-NEXT:  .LBB5_3414: # %udiv-bb1
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_3416
+; X86-NEXT:    jmp .LBB5_3417
+; X86-NEXT:  .LBB5_3420: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3423
+; X86-NEXT:  .LBB5_3422: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3424
+; X86-NEXT:    jmp .LBB5_3425
+; X86-NEXT:  .LBB5_3428: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3431
+; X86-NEXT:  .LBB5_3430: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3432
+; X86-NEXT:    jmp .LBB5_3433
+; X86-NEXT:  .LBB5_3474: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_3477
+; X86-NEXT:  .LBB5_3476: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3479
+; X86-NEXT:  .LBB5_3478:
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3480: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_3482
+; X86-NEXT:  # %bb.3481: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3482: # %udiv-bb1
+; X86-NEXT:    cmpl $128, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3484
+; X86-NEXT:  # %bb.3483:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3484: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    je .LBB5_3486
+; X86-NEXT:  # %bb.3485: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_3486: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_3488
+; X86-NEXT:  # %bb.3487: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3488: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3490
+; X86-NEXT:  # %bb.3489:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3490: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3492
+; X86-NEXT:  # %bb.3491:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3492: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    je .LBB5_3494
+; X86-NEXT:  # %bb.3493: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3494: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_3496
+; X86-NEXT:  # %bb.3495: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3496: # %udiv-bb1
+; X86-NEXT:    movl $896, %ebx # imm = 0x380
+; X86-NEXT:    subl (%esp), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    jne .LBB5_3498
+; X86-NEXT:  # %bb.3497: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_3498: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shrdl %cl, %edi, %ebp
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    jne .LBB5_3500
+; X86-NEXT:  # %bb.3499: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:  .LBB5_3500: # %udiv-bb1
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_3502
+; X86-NEXT:  # %bb.3501: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_3502: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    jae .LBB5_3504
+; X86-NEXT:  # %bb.3503:
+; X86-NEXT:    orl %ebp, %edi
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:  .LBB5_3504: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB5_3506
+; X86-NEXT:  # %bb.3505: # %udiv-bb1
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB5_3506: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3508
+; X86-NEXT:  # %bb.3507: # %udiv-bb1
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:  .LBB5_3508: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_3509
+; X86-NEXT:  # %bb.3510: # %udiv-bb1
+; X86-NEXT:    cmpl $0, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_3511
+; X86-NEXT:  .LBB5_3512: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3514
+; X86-NEXT:  .LBB5_3513: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3514: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_3516
+; X86-NEXT:  # %bb.3515: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_3516: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3517
+; X86-NEXT:  # %bb.3518: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3519
+; X86-NEXT:  .LBB5_3520: # %udiv-bb1
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_3522
+; X86-NEXT:  .LBB5_3521: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3522: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB5_3524
+; X86-NEXT:  # %bb.3523: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:  .LBB5_3524: # %udiv-bb1
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3526
+; X86-NEXT:  # %bb.3525:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB5_3526: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_3528
+; X86-NEXT:  # %bb.3527: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3528: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3530
+; X86-NEXT:  # %bb.3529: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3530: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_3532
+; X86-NEXT:  # %bb.3531: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3532: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_3534
+; X86-NEXT:  # %bb.3533: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3534: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_3536
+; X86-NEXT:  # %bb.3535: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_3536: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_3538
+; X86-NEXT:  # %bb.3537: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3538: # %udiv-bb1
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_3540
+; X86-NEXT:  # %bb.3539: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_3540: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_3542
+; X86-NEXT:  # %bb.3541: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_3542: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_3544
+; X86-NEXT:  # %bb.3543: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB5_3544: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_3546
+; X86-NEXT:  # %bb.3545: # %udiv-bb1
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_3546: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3548
+; X86-NEXT:  # %bb.3547: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3548: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3550
+; X86-NEXT:  # %bb.3549: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3550: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3552
+; X86-NEXT:  # %bb.3551: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3552: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3554
+; X86-NEXT:  # %bb.3553:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3554: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3556
+; X86-NEXT:  # %bb.3555: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3556: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3558
+; X86-NEXT:  # %bb.3557: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3558: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3560
+; X86-NEXT:  # %bb.3559:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3560: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_3562
+; X86-NEXT:  # %bb.3561: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3562: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3564
+; X86-NEXT:  # %bb.3563: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3564: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3566
+; X86-NEXT:  # %bb.3565: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_3566: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_3568
+; X86-NEXT:  # %bb.3567: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_3568: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3570
+; X86-NEXT:  # %bb.3569: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3570: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_3572
+; X86-NEXT:  # %bb.3571: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_3572: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_3574
+; X86-NEXT:  # %bb.3573: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:  .LBB5_3574: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3576
+; X86-NEXT:  # %bb.3575: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3576: # %udiv-bb1
+; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_3578
+; X86-NEXT:  # %bb.3577: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB5_3578: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    jne .LBB5_3580
+; X86-NEXT:  # %bb.3579: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3580: # %udiv-bb1
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3582
+; X86-NEXT:  # %bb.3581: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3582: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3584
+; X86-NEXT:  # %bb.3583: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3584: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_3586
+; X86-NEXT:  # %bb.3585:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3586: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jae .LBB5_3588
+; X86-NEXT:  # %bb.3587:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3588: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_3590
+; X86-NEXT:  # %bb.3589:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3590: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3592
+; X86-NEXT:  # %bb.3591: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3592: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3594
+; X86-NEXT:  # %bb.3593: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3594: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3596
+; X86-NEXT:  # %bb.3595:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3596: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3598
+; X86-NEXT:  # %bb.3597: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3598: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3600
+; X86-NEXT:  # %bb.3599:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3600: # %udiv-bb1
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3602
+; X86-NEXT:  # %bb.3601: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3602: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3604
+; X86-NEXT:  # %bb.3603: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3604: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_3606
+; X86-NEXT:  # %bb.3605: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_3606: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_3608
+; X86-NEXT:  # %bb.3607: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3608: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_3610
+; X86-NEXT:  # %bb.3609: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3610: # %udiv-bb1
+; X86-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_3612
+; X86-NEXT:  # %bb.3611: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3612: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB5_3614
+; X86-NEXT:  # %bb.3613: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_3614: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3616
+; X86-NEXT:  # %bb.3615: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3616: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3618
+; X86-NEXT:  # %bb.3617: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3618: # %udiv-bb1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    je .LBB5_3620
+; X86-NEXT:  # %bb.3619: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_3620: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_3622
+; X86-NEXT:  # %bb.3621: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3622: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_3624
+; X86-NEXT:  # %bb.3623: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3624: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_3626
+; X86-NEXT:  # %bb.3625: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_3626: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3628
+; X86-NEXT:  # %bb.3627: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3628: # %udiv-bb1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3629
+; X86-NEXT:  # %bb.3630: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3631
+; X86-NEXT:  .LBB5_3632: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3633
+; X86-NEXT:  .LBB5_3634: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3636
+; X86-NEXT:  .LBB5_3635: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3636: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    jne .LBB5_3638
+; X86-NEXT:  # %bb.3637: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:  .LBB5_3638: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3640
+; X86-NEXT:  # %bb.3639: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3640: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3642
+; X86-NEXT:  # %bb.3641: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3642: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_3644
+; X86-NEXT:  # %bb.3643: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3644: # %udiv-bb1
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_3646
+; X86-NEXT:  # %bb.3645: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3646: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_3648
+; X86-NEXT:  # %bb.3647:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3648: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_3650
+; X86-NEXT:  # %bb.3649:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3650: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3652
+; X86-NEXT:  # %bb.3651:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ebp, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3652: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3654
+; X86-NEXT:  # %bb.3653:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3654: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $0, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_3656
+; X86-NEXT:  # %bb.3655: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3656: # %udiv-bb1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_3658
+; X86-NEXT:  # %bb.3657: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3658: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_3660
+; X86-NEXT:  # %bb.3659: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_3660: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_3662
+; X86-NEXT:  # %bb.3661: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_3662: # %udiv-bb1
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3664
+; X86-NEXT:  # %bb.3663:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3664: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3666
+; X86-NEXT:  # %bb.3665: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB5_3666: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3668
+; X86-NEXT:  # %bb.3667: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3668: # %udiv-bb1
+; X86-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3670
+; X86-NEXT:  # %bb.3669:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3670: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    jne .LBB5_3672
+; X86-NEXT:  # %bb.3671: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_3672: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3674
+; X86-NEXT:  # %bb.3673:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3674: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_3676
+; X86-NEXT:  # %bb.3675: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3676: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3678
+; X86-NEXT:  # %bb.3677:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3678: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %eax
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3680
+; X86-NEXT:  # %bb.3679: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB5_3680: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jae .LBB5_3682
+; X86-NEXT:  # %bb.3681:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3682: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %eax
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3684
+; X86-NEXT:  # %bb.3683: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB5_3684: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3686
+; X86-NEXT:  # %bb.3685:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3686: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_3688
+; X86-NEXT:  # %bb.3687:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3688: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %eax
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3690
+; X86-NEXT:  # %bb.3689: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB5_3690: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_3692
+; X86-NEXT:  # %bb.3691: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3692: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_3694
+; X86-NEXT:  # %bb.3693: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3694: # %udiv-bb1
+; X86-NEXT:    cmpl $0, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_3696
+; X86-NEXT:  # %bb.3695: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3696: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_3697
+; X86-NEXT:  # %bb.3698: # %udiv-bb1
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    jne .LBB5_3699
+; X86-NEXT:  .LBB5_3700: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_3702
+; X86-NEXT:  .LBB5_3701: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3702: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_3704
+; X86-NEXT:  # %bb.3703: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_3704: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_3706
+; X86-NEXT:  # %bb.3705: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3706: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_3708
+; X86-NEXT:  # %bb.3707: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3708: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %al, %cl
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %ebx
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3710
+; X86-NEXT:  # %bb.3709: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB5_3710: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3712
+; X86-NEXT:  # %bb.3711:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3712: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    je .LBB5_3714
+; X86-NEXT:  # %bb.3713: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_3714: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_3716
+; X86-NEXT:  # %bb.3715: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3716: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    je .LBB5_3718
+; X86-NEXT:  # %bb.3717: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3718: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB5_3720
+; X86-NEXT:  # %bb.3719: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_3720: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3722
+; X86-NEXT:  # %bb.3721: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3722: # %udiv-bb1
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3724
+; X86-NEXT:  # %bb.3723: # %udiv-bb1
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB5_3724: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3726
+; X86-NEXT:  # %bb.3725: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_3726: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_3728
+; X86-NEXT:  # %bb.3727:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:  .LBB5_3728: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    jae .LBB5_3729
+; X86-NEXT:  # %bb.3730: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3731
+; X86-NEXT:  .LBB5_3732: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_3734
+; X86-NEXT:  .LBB5_3733:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3734: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    jb .LBB5_3736
+; X86-NEXT:  # %bb.3735: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3736: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_3738
+; X86-NEXT:  # %bb.3737: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_3738: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_3740
+; X86-NEXT:  # %bb.3739:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:  .LBB5_3740: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3742
+; X86-NEXT:  # %bb.3741:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3742: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_3744
+; X86-NEXT:  # %bb.3743: # %udiv-bb1
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB5_3744: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3746
+; X86-NEXT:  # %bb.3745:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB5_3746: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $0, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_3748
+; X86-NEXT:  # %bb.3747: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3748: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:    je .LBB5_3750
+; X86-NEXT:  # %bb.3749: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB5_3750: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_3752
+; X86-NEXT:  # %bb.3751: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3752: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_3754
+; X86-NEXT:  # %bb.3753: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3754: # %udiv-bb1
+; X86-NEXT:    cmpl $256, %edx # imm = 0x100
+; X86-NEXT:    jae .LBB5_3756
+; X86-NEXT:  # %bb.3755:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:  .LBB5_3756: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3758
+; X86-NEXT:  # %bb.3757: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_3758: # %udiv-bb1
+; X86-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3760
+; X86-NEXT:  # %bb.3759:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3760: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3762
+; X86-NEXT:  # %bb.3761: # %udiv-bb1
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_3762: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3764
+; X86-NEXT:  # %bb.3763:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3764: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_3766
+; X86-NEXT:  # %bb.3765:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3766: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3768
+; X86-NEXT:  # %bb.3767: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB5_3768: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3770
+; X86-NEXT:  # %bb.3769:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3770: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_3772
+; X86-NEXT:  # %bb.3771: # %udiv-bb1
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB5_3772: # %udiv-bb1
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    je .LBB5_3774
+; X86-NEXT:  # %bb.3773: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_3774: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3776
+; X86-NEXT:  # %bb.3775: # %udiv-bb1
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB5_3776: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_3778
+; X86-NEXT:  # %bb.3777: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3778: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    je .LBB5_3780
+; X86-NEXT:  # %bb.3779: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_3780: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3782
+; X86-NEXT:  # %bb.3781: # %udiv-bb1
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:  .LBB5_3782: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3783
+; X86-NEXT:  # %bb.3784: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3785
+; X86-NEXT:  .LBB5_3786: # %udiv-bb1
+; X86-NEXT:    cmpl $128, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3787
+; X86-NEXT:  .LBB5_3788: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3790
+; X86-NEXT:  .LBB5_3789:
+; X86-NEXT:    orl %ebp, %edi
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:  .LBB5_3790: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $0, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_3792
+; X86-NEXT:  # %bb.3791: # %udiv-bb1
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:  .LBB5_3792: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_3794
+; X86-NEXT:  # %bb.3793: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:  .LBB5_3794: # %udiv-bb1
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_3795
+; X86-NEXT:  # %bb.3796: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_3797
+; X86-NEXT:  .LBB5_3509:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    cmpl $0, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_3512
+; X86-NEXT:  .LBB5_3511: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3513
+; X86-NEXT:    jmp .LBB5_3514
+; X86-NEXT:  .LBB5_3517: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3520
+; X86-NEXT:  .LBB5_3519: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_3521
+; X86-NEXT:    jmp .LBB5_3522
+; X86-NEXT:  .LBB5_3629: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3632
+; X86-NEXT:  .LBB5_3631: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3634
+; X86-NEXT:  .LBB5_3633: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3635
+; X86-NEXT:    jmp .LBB5_3636
+; X86-NEXT:  .LBB5_3697: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    je .LBB5_3700
+; X86-NEXT:  .LBB5_3699: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_3701
+; X86-NEXT:    jmp .LBB5_3702
+; X86-NEXT:  .LBB5_3729: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3732
+; X86-NEXT:  .LBB5_3731: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_3733
+; X86-NEXT:    jmp .LBB5_3734
+; X86-NEXT:  .LBB5_3783: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3786
+; X86-NEXT:  .LBB5_3785: # %udiv-bb1
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    cmpl $128, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3788
+; X86-NEXT:  .LBB5_3787:
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3789
+; X86-NEXT:    jmp .LBB5_3790
+; X86-NEXT:  .LBB5_3795:
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3797: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_3799
+; X86-NEXT:  # %bb.3798: # %udiv-bb1
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB5_3799: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3801
+; X86-NEXT:  # %bb.3800: # %udiv-bb1
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:  .LBB5_3801: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_3803
+; X86-NEXT:  # %bb.3802: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_3803: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3805
+; X86-NEXT:  # %bb.3804: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_3805: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3807
+; X86-NEXT:  # %bb.3806: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_3807: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    jne .LBB5_3809
+; X86-NEXT:  # %bb.3808: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3809: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3811
+; X86-NEXT:  # %bb.3810: # %udiv-bb1
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_3811: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB5_3813
+; X86-NEXT:  # %bb.3812: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3813: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3815
+; X86-NEXT:  # %bb.3814: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3815: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3817
+; X86-NEXT:  # %bb.3816:
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3817: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3819
+; X86-NEXT:  # %bb.3818: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3819: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3821
+; X86-NEXT:  # %bb.3820:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3821: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3823
+; X86-NEXT:  # %bb.3822: # %udiv-bb1
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_3823: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3825
+; X86-NEXT:  # %bb.3824:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3825: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3827
+; X86-NEXT:  # %bb.3826: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3827: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3829
+; X86-NEXT:  # %bb.3828: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3829: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_3831
+; X86-NEXT:  # %bb.3830: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_3831: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_3833
+; X86-NEXT:  # %bb.3832: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3833: # %udiv-bb1
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    je .LBB5_3835
+; X86-NEXT:  # %bb.3834: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3835: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3837
+; X86-NEXT:  # %bb.3836: # %udiv-bb1
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB5_3837: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3838
+; X86-NEXT:  # %bb.3839: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3840
+; X86-NEXT:  .LBB5_3841: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3842
+; X86-NEXT:  .LBB5_3843: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3844
+; X86-NEXT:  .LBB5_3845: # %udiv-bb1
+; X86-NEXT:    cmpl $128, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3846
+; X86-NEXT:  .LBB5_3847: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3849
+; X86-NEXT:  .LBB5_3848:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3849: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_3851
+; X86-NEXT:  # %bb.3850: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_3851: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    je .LBB5_3853
+; X86-NEXT:  # %bb.3852: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_3853: # %udiv-bb1
+; X86-NEXT:    cmpl $256, %edx # imm = 0x100
+; X86-NEXT:    jae .LBB5_3855
+; X86-NEXT:  # %bb.3854:
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:  .LBB5_3855: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    je .LBB5_3856
+; X86-NEXT:  # %bb.3857: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_3858
+; X86-NEXT:  .LBB5_3859: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3860
+; X86-NEXT:  .LBB5_3861: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3862
+; X86-NEXT:  .LBB5_3863: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3865
+; X86-NEXT:  .LBB5_3864: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3865: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3867
+; X86-NEXT:  # %bb.3866: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3867: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3869
+; X86-NEXT:  # %bb.3868: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3869: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3871
+; X86-NEXT:  # %bb.3870: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3871: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3873
+; X86-NEXT:  # %bb.3872: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3873: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3875
+; X86-NEXT:  # %bb.3874: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3875: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3877
+; X86-NEXT:  # %bb.3876: # %udiv-bb1
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_3877: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3879
+; X86-NEXT:  # %bb.3878:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3879: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3881
+; X86-NEXT:  # %bb.3880:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3881: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    je .LBB5_3883
+; X86-NEXT:  # %bb.3882: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3883: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_3885
+; X86-NEXT:  # %bb.3884:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:  .LBB5_3885: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_3887
+; X86-NEXT:  # %bb.3886: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_3887: # %udiv-bb1
+; X86-NEXT:    cmpl $128, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3889
+; X86-NEXT:  # %bb.3888:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3889: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_3891
+; X86-NEXT:  # %bb.3890: # %udiv-bb1
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_3891: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3893
+; X86-NEXT:  # %bb.3892:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3893: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    je .LBB5_3895
+; X86-NEXT:  # %bb.3894: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3895: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3897
+; X86-NEXT:  # %bb.3896: # %udiv-bb1
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB5_3897: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_3899
+; X86-NEXT:  # %bb.3898: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_3899: # %udiv-bb1
+; X86-NEXT:    cmpl $256, %edx # imm = 0x100
+; X86-NEXT:    jae .LBB5_3901
+; X86-NEXT:  # %bb.3900:
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:  .LBB5_3901: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_3903
+; X86-NEXT:  # %bb.3902: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_3903: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3905
+; X86-NEXT:  # %bb.3904: # %udiv-bb1
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_3905: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3907
+; X86-NEXT:  # %bb.3906: # %udiv-bb1
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB5_3907: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_3909
+; X86-NEXT:  # %bb.3908:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:  .LBB5_3909: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_3911
+; X86-NEXT:  # %bb.3910: # %udiv-bb1
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB5_3911: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_3913
+; X86-NEXT:  # %bb.3912: # %udiv-bb1
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB5_3913: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3915
+; X86-NEXT:  # %bb.3914: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_3915: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_3917
+; X86-NEXT:  # %bb.3916: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_3917: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    je .LBB5_3918
+; X86-NEXT:  # %bb.3919: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jae .LBB5_3920
+; X86-NEXT:  .LBB5_3921: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_3923
+; X86-NEXT:  .LBB5_3922: # %udiv-bb1
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB5_3923: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3925
+; X86-NEXT:  # %bb.3924: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3925: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3927
+; X86-NEXT:  # %bb.3926: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3927: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    jne .LBB5_3929
+; X86-NEXT:  # %bb.3928: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3929: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3931
+; X86-NEXT:  # %bb.3930: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:  .LBB5_3931: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_3932
+; X86-NEXT:  # %bb.3933: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jmp .LBB5_3934
+; X86-NEXT:  .LBB5_3838: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3841
+; X86-NEXT:  .LBB5_3840: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3843
+; X86-NEXT:  .LBB5_3842: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3845
+; X86-NEXT:  .LBB5_3844: # %udiv-bb1
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    cmpl $128, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3847
+; X86-NEXT:  .LBB5_3846:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3848
+; X86-NEXT:    jmp .LBB5_3849
+; X86-NEXT:  .LBB5_3856: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_3859
+; X86-NEXT:  .LBB5_3858: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3861
+; X86-NEXT:  .LBB5_3860: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3863
+; X86-NEXT:  .LBB5_3862: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3864
+; X86-NEXT:    jmp .LBB5_3865
+; X86-NEXT:  .LBB5_3918: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_3921
+; X86-NEXT:  .LBB5_3920: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_3922
+; X86-NEXT:    jmp .LBB5_3923
+; X86-NEXT:  .LBB5_3932:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_3934: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_3936
+; X86-NEXT:  # %bb.3935: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_3936: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_3938
+; X86-NEXT:  # %bb.3937:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB5_3938: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3940
+; X86-NEXT:  # %bb.3939:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:  .LBB5_3940: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    jne .LBB5_3941
+; X86-NEXT:  # %bb.3942: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_3943
+; X86-NEXT:  .LBB5_3944: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3945
+; X86-NEXT:  .LBB5_3946: # %udiv-bb1
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_3948
+; X86-NEXT:  .LBB5_3947:
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB5_3948: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_3950
+; X86-NEXT:  # %bb.3949: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_3950: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3952
+; X86-NEXT:  # %bb.3951: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3952: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_3954
+; X86-NEXT:  # %bb.3953: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:  .LBB5_3954: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    jne .LBB5_3956
+; X86-NEXT:  # %bb.3955: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:  .LBB5_3956: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_3958
+; X86-NEXT:  # %bb.3957: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:  .LBB5_3958: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_3960
+; X86-NEXT:  # %bb.3959: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3960: # %udiv-bb1
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_3962
+; X86-NEXT:  # %bb.3961: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_3962: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT:    jb .LBB5_3964
+; X86-NEXT:  # %bb.3963: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3964: # %udiv-bb1
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_3965
+; X86-NEXT:  # %bb.3966: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_3967
+; X86-NEXT:  .LBB5_3968: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3970
+; X86-NEXT:  .LBB5_3969: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3970: # %udiv-bb1
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_3972
+; X86-NEXT:  # %bb.3971: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_3972: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3974
+; X86-NEXT:  # %bb.3973: # %udiv-bb1
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB5_3974: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3976
+; X86-NEXT:  # %bb.3975: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_3976: # %udiv-bb1
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3978
+; X86-NEXT:  # %bb.3977: # %udiv-bb1
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_3978: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3980
+; X86-NEXT:  # %bb.3979: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3980: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_3982
+; X86-NEXT:  # %bb.3981: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB5_3982: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_3984
+; X86-NEXT:  # %bb.3983:
+; X86-NEXT:    orl %ebp, %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_3984: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3985
+; X86-NEXT:  # %bb.3986: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_3987
+; X86-NEXT:  .LBB5_3941: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_3944
+; X86-NEXT:  .LBB5_3943: # %udiv-bb1
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3946
+; X86-NEXT:  .LBB5_3945: # %udiv-bb1
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_3947
+; X86-NEXT:    jmp .LBB5_3948
+; X86-NEXT:  .LBB5_3965: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_3968
+; X86-NEXT:  .LBB5_3967: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_3969
+; X86-NEXT:    jmp .LBB5_3970
+; X86-NEXT:  .LBB5_3985:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %ebx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3987: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    je .LBB5_3989
+; X86-NEXT:  # %bb.3988: # %udiv-bb1
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_3989: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_3991
+; X86-NEXT:  # %bb.3990: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_3991: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_3993
+; X86-NEXT:  # %bb.3992: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3993: # %udiv-bb1
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3995
+; X86-NEXT:  # %bb.3994: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3995: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3997
+; X86-NEXT:  # %bb.3996: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3997: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_3999
+; X86-NEXT:  # %bb.3998: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_3999: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_4001
+; X86-NEXT:  # %bb.4000:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4001: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_4003
+; X86-NEXT:  # %bb.4002:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4003: # %udiv-bb1
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    je .LBB5_4005
+; X86-NEXT:  # %bb.4004: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_4005: # %udiv-bb1
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    je .LBB5_4007
+; X86-NEXT:  # %bb.4006: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_4007: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    je .LBB5_4009
+; X86-NEXT:  # %bb.4008: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_4009: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4011
+; X86-NEXT:  # %bb.4010:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:  .LBB5_4011: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    je .LBB5_4013
+; X86-NEXT:  # %bb.4012: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_4013: # %udiv-bb1
+; X86-NEXT:    cmpl $256, %esi # imm = 0x100
+; X86-NEXT:    jb .LBB5_4015
+; X86-NEXT:  # %bb.4014: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4015: # %udiv-bb1
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_4017
+; X86-NEXT:  # %bb.4016:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4017: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_4019
+; X86-NEXT:  # %bb.4018: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_4019: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_4021
+; X86-NEXT:  # %bb.4020: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_4021: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_4023
+; X86-NEXT:  # %bb.4022: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4023: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_4025
+; X86-NEXT:  # %bb.4024: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_4025: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_4027
+; X86-NEXT:  # %bb.4026: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_4027: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_4029
+; X86-NEXT:  # %bb.4028: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_4029: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    je .LBB5_4030
+; X86-NEXT:  # %bb.4031: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jae .LBB5_4032
+; X86-NEXT:  .LBB5_4033: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4035
+; X86-NEXT:  .LBB5_4034: # %udiv-bb1
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB5_4035: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_4037
+; X86-NEXT:  # %bb.4036: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4037: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_4039
+; X86-NEXT:  # %bb.4038: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_4039: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_4041
+; X86-NEXT:  # %bb.4040: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4041: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_4043
+; X86-NEXT:  # %bb.4042: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_4043: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_4045
+; X86-NEXT:  # %bb.4044: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_4045: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_4047
+; X86-NEXT:  # %bb.4046: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_4047: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_4049
+; X86-NEXT:  # %bb.4048: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_4049: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_4051
+; X86-NEXT:  # %bb.4050: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_4051: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    je .LBB5_4052
+; X86-NEXT:  # %bb.4053: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4054
+; X86-NEXT:  .LBB5_4055: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4056
+; X86-NEXT:  .LBB5_4057: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_4058
+; X86-NEXT:  .LBB5_4059: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4060
+; X86-NEXT:  .LBB5_4061: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jae .LBB5_4062
+; X86-NEXT:  .LBB5_4063: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4065
+; X86-NEXT:  .LBB5_4064: # %udiv-bb1
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB5_4065: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_4067
+; X86-NEXT:  # %bb.4066: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4067: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4069
+; X86-NEXT:  # %bb.4068: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4069: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4071
+; X86-NEXT:  # %bb.4070: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4071: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4073
+; X86-NEXT:  # %bb.4072: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4073: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4075
+; X86-NEXT:  # %bb.4074: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4075: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4077
+; X86-NEXT:  # %bb.4076: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4077: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4079
+; X86-NEXT:  # %bb.4078: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4079: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4081
+; X86-NEXT:  # %bb.4080: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4081: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4083
+; X86-NEXT:  # %bb.4082: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4083: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4085
+; X86-NEXT:  # %bb.4084: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4085: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4087
+; X86-NEXT:  # %bb.4086: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4087: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4089
+; X86-NEXT:  # %bb.4088: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4089: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4091
+; X86-NEXT:  # %bb.4090: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4091: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4093
+; X86-NEXT:  # %bb.4092: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4093: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4095
+; X86-NEXT:  # %bb.4094: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4095: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4097
+; X86-NEXT:  # %bb.4096: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4097: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4099
+; X86-NEXT:  # %bb.4098: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4099: # %udiv-bb1
+; X86-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4101
+; X86-NEXT:  # %bb.4100:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4101: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_4103
+; X86-NEXT:  # %bb.4102:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4103: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4105
+; X86-NEXT:  # %bb.4104:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %ebp, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4105: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4107
+; X86-NEXT:  # %bb.4106:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4107: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_4108
+; X86-NEXT:  # %bb.4109: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4110
+; X86-NEXT:  .LBB5_4111: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %esi
+; X86-NEXT:    jb .LBB5_4112
+; X86-NEXT:  .LBB5_4113: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4115
+; X86-NEXT:  .LBB5_4114:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4115: # %udiv-bb1
+; X86-NEXT:    cmpl $0, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4117
+; X86-NEXT:  # %bb.4116: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_4117: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4119
+; X86-NEXT:  # %bb.4118: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_4119: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_4121
+; X86-NEXT:  # %bb.4120: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_4121: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4123
+; X86-NEXT:  # %bb.4122: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_4123: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_4125
+; X86-NEXT:  # %bb.4124: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_4125: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_4127
+; X86-NEXT:  # %bb.4126: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_4127: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4128
+; X86-NEXT:  # %bb.4129: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_4130
+; X86-NEXT:  .LBB5_4131: # %udiv-bb1
+; X86-NEXT:    cmpl $128, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4132
+; X86-NEXT:  .LBB5_4133: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4135
+; X86-NEXT:  .LBB5_4134:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB5_4135: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4137
+; X86-NEXT:  # %bb.4136:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_4137: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4139
+; X86-NEXT:  # %bb.4138:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB5_4139: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $0, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_4141
+; X86-NEXT:  # %bb.4140: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_4141: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_4143
+; X86-NEXT:  # %bb.4142: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4143: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4145
+; X86-NEXT:  # %bb.4144: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_4145: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    je .LBB5_4147
+; X86-NEXT:  # %bb.4146: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4147: # %udiv-bb1
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_4149
+; X86-NEXT:  # %bb.4148:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4149: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_4151
+; X86-NEXT:  # %bb.4150: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_4151: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_4153
+; X86-NEXT:  # %bb.4152: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4153: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4155
+; X86-NEXT:  # %bb.4154: # %udiv-bb1
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB5_4155: # %udiv-bb1
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    je .LBB5_4156
+; X86-NEXT:  # %bb.4157: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4158
+; X86-NEXT:  .LBB5_4159: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4161
+; X86-NEXT:  .LBB5_4160: # %udiv-bb1
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB5_4161: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_4163
+; X86-NEXT:  # %bb.4162: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4163: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_4165
+; X86-NEXT:  # %bb.4164: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_4165: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4167
+; X86-NEXT:  # %bb.4166: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4167: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    jne .LBB5_4169
+; X86-NEXT:  # %bb.4168: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4169: # %udiv-bb1
+; X86-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_4171
+; X86-NEXT:  # %bb.4170:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4171: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4173
+; X86-NEXT:  # %bb.4172: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4173: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4175
+; X86-NEXT:  # %bb.4174: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4175: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4177
+; X86-NEXT:  # %bb.4176:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4177: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4179
+; X86-NEXT:  # %bb.4178:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4179: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_4181
+; X86-NEXT:  # %bb.4180: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_4181: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_4183
+; X86-NEXT:  # %bb.4182: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_4183: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_4185
+; X86-NEXT:  # %bb.4184: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_4185: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4187
+; X86-NEXT:  # %bb.4186: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4187: # %udiv-bb1
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_4189
+; X86-NEXT:  # %bb.4188: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4189: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_4191
+; X86-NEXT:  # %bb.4190: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4191: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_4193
+; X86-NEXT:  # %bb.4192: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4193: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_4195
+; X86-NEXT:  # %bb.4194: # %udiv-bb1
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB5_4195: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_4197
+; X86-NEXT:  # %bb.4196: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4197: # %udiv-bb1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    je .LBB5_4199
+; X86-NEXT:  # %bb.4198: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_4199: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_4201
+; X86-NEXT:  # %bb.4200: # %udiv-bb1
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:  .LBB5_4201: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4203
+; X86-NEXT:  # %bb.4202: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4203: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4205
+; X86-NEXT:  # %bb.4204: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4205: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB5_4207
+; X86-NEXT:  # %bb.4206: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4207: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4209
+; X86-NEXT:  # %bb.4208: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4209: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_4211
+; X86-NEXT:  # %bb.4210: # %udiv-bb1
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_4211: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4213
+; X86-NEXT:  # %bb.4212: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4213: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4215
+; X86-NEXT:  # %bb.4214: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4215: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4217
+; X86-NEXT:  # %bb.4216: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4217: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4219
+; X86-NEXT:  # %bb.4218: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4219: # %udiv-bb1
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4221
+; X86-NEXT:  # %bb.4220: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4221: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4223
+; X86-NEXT:  # %bb.4222: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4223: # %udiv-bb1
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4225
+; X86-NEXT:  # %bb.4224: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4225: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_4227
+; X86-NEXT:  # %bb.4226: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4227: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    jne .LBB5_4229
+; X86-NEXT:  # %bb.4228: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_4229: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_4231
+; X86-NEXT:  # %bb.4230:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4231: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4233
+; X86-NEXT:  # %bb.4232:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4233: # %udiv-bb1
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_4235
+; X86-NEXT:  # %bb.4234: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_4235: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_4237
+; X86-NEXT:  # %bb.4236: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4237: # %udiv-bb1
+; X86-NEXT:    cmpl $512, %edx # imm = 0x200
+; X86-NEXT:    jb .LBB5_4239
+; X86-NEXT:  # %bb.4238: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4239: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4241
+; X86-NEXT:  # %bb.4240: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4241: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4243
+; X86-NEXT:  # %bb.4242:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4243: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4245
+; X86-NEXT:  # %bb.4244: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4245: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_4247
+; X86-NEXT:  # %bb.4246: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_4247: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl (%esp), %ebx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_4249
+; X86-NEXT:  # %bb.4248: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4249: # %udiv-bb1
+; X86-NEXT:    subl $512, %ebx # imm = 0x200
+; X86-NEXT:    jb .LBB5_4251
+; X86-NEXT:  # %bb.4250: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4251: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_4253
+; X86-NEXT:  # %bb.4252:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:  .LBB5_4253: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4255
+; X86-NEXT:  # %bb.4254: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_4255: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4257
+; X86-NEXT:  # %bb.4256: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_4257: # %udiv-bb1
+; X86-NEXT:    cmpl $128, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4258
+; X86-NEXT:  # %bb.4259: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4261
+; X86-NEXT:    jmp .LBB5_4262
+; X86-NEXT:  .LBB5_4030: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_4033
+; X86-NEXT:  .LBB5_4032: # %udiv-bb1
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4034
+; X86-NEXT:    jmp .LBB5_4035
+; X86-NEXT:  .LBB5_4052: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4055
+; X86-NEXT:  .LBB5_4054: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4057
+; X86-NEXT:  .LBB5_4056: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_4059
+; X86-NEXT:  .LBB5_4058: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4061
+; X86-NEXT:  .LBB5_4060: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jb .LBB5_4063
+; X86-NEXT:  .LBB5_4062: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4064
+; X86-NEXT:    jmp .LBB5_4065
+; X86-NEXT:  .LBB5_4108:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4111
+; X86-NEXT:  .LBB5_4110:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %esi
+; X86-NEXT:    jae .LBB5_4113
+; X86-NEXT:  .LBB5_4112:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4114
+; X86-NEXT:    jmp .LBB5_4115
+; X86-NEXT:  .LBB5_4128: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_4131
+; X86-NEXT:  .LBB5_4130: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $128, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4133
+; X86-NEXT:  .LBB5_4132:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4134
+; X86-NEXT:    jmp .LBB5_4135
+; X86-NEXT:  .LBB5_4156: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4159
+; X86-NEXT:  .LBB5_4158: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4160
+; X86-NEXT:    jmp .LBB5_4161
+; X86-NEXT:  .LBB5_4258:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4262
+; X86-NEXT:  .LBB5_4261:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_4262: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4264
+; X86-NEXT:  # %bb.4263: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:  .LBB5_4264: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_4266
+; X86-NEXT:  # %bb.4265: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4266: # %udiv-bb1
+; X86-NEXT:    cmpl $256, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_4268
+; X86-NEXT:  # %bb.4267:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4268: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_4270
+; X86-NEXT:  # %bb.4269: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_4270: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_4272
+; X86-NEXT:  # %bb.4271: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_4272: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_4274
+; X86-NEXT:  # %bb.4273: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:  .LBB5_4274: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_4276
+; X86-NEXT:  # %bb.4275: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_4276: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_4278
+; X86-NEXT:  # %bb.4277: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4278: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_4280
+; X86-NEXT:  # %bb.4279: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4280: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_4282
+; X86-NEXT:  # %bb.4281: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_4282: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_4284
+; X86-NEXT:  # %bb.4283: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_4284: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    leal 384(%ebx), %ecx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT:    jne .LBB5_4286
+; X86-NEXT:  # %bb.4285: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:  .LBB5_4286: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4288
+; X86-NEXT:  # %bb.4287: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_4288: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_4290
+; X86-NEXT:  # %bb.4289: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_4290: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_4292
+; X86-NEXT:  # %bb.4291: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4292: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    jne .LBB5_4294
+; X86-NEXT:  # %bb.4293: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4294: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4296
+; X86-NEXT:  # %bb.4295: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4296: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    jne .LBB5_4298
+; X86-NEXT:  # %bb.4297: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_4298: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4300
+; X86-NEXT:  # %bb.4299: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4300: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4302
+; X86-NEXT:  # %bb.4301:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4302: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_4304
+; X86-NEXT:  # %bb.4303: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_4304: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_4306
+; X86-NEXT:  # %bb.4305: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4306: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_4308
+; X86-NEXT:  # %bb.4307: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_4308: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_4310
+; X86-NEXT:  # %bb.4309:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4310: # %udiv-bb1
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4312
+; X86-NEXT:  # %bb.4311: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4312: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4314
+; X86-NEXT:  # %bb.4313: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4314: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4315
+; X86-NEXT:  # %bb.4316: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4317
+; X86-NEXT:  .LBB5_4318: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4319
+; X86-NEXT:  .LBB5_4320: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_4322
+; X86-NEXT:  .LBB5_4321: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB5_4322: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4324
+; X86-NEXT:  # %bb.4323: # %udiv-bb1
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_4324: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_4326
+; X86-NEXT:  # %bb.4325: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4326: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_4328
+; X86-NEXT:  # %bb.4327: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4328: # %udiv-bb1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4329
+; X86-NEXT:  # %bb.4330: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4331
+; X86-NEXT:  .LBB5_4332: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4333
+; X86-NEXT:  .LBB5_4334: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jb .LBB5_4336
+; X86-NEXT:  .LBB5_4335: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4336: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4337
+; X86-NEXT:  # %bb.4338: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4339
+; X86-NEXT:  .LBB5_4340: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4342
+; X86-NEXT:  .LBB5_4341:
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_4342: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_4344
+; X86-NEXT:  # %bb.4343:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4344: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_4346
+; X86-NEXT:  # %bb.4345: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_4346: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    jne .LBB5_4348
+; X86-NEXT:  # %bb.4347: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_4348: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_4350
+; X86-NEXT:  # %bb.4349: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_4350: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_4352
+; X86-NEXT:  # %bb.4351: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_4352: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_4354
+; X86-NEXT:  # %bb.4353: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_4354: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4356
+; X86-NEXT:  # %bb.4355: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4356: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_4358
+; X86-NEXT:  # %bb.4357: # %udiv-bb1
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_4358: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4360
+; X86-NEXT:  # %bb.4359:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4360: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_4362
+; X86-NEXT:  # %bb.4361: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_4362: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4364
+; X86-NEXT:  # %bb.4363:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4364: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_4366
+; X86-NEXT:  # %bb.4365:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4366: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4368
+; X86-NEXT:  # %bb.4367: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4368: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4370
+; X86-NEXT:  # %bb.4369: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4370: # %udiv-bb1
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4372
+; X86-NEXT:  # %bb.4371: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4372: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4374
+; X86-NEXT:  # %bb.4373: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4374: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Reload
+; X86-NEXT:    jne .LBB5_4376
+; X86-NEXT:  # %bb.4375: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4376: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4378
+; X86-NEXT:  # %bb.4377: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4378: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4380
+; X86-NEXT:  # %bb.4379: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_4380: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_4382
+; X86-NEXT:  # %bb.4381:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4382: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    jne .LBB5_4384
+; X86-NEXT:  # %bb.4383: # %udiv-bb1
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_4384: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4386
+; X86-NEXT:  # %bb.4385: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_4386: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_4388
+; X86-NEXT:  # %bb.4387: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_4388: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4390
+; X86-NEXT:  # %bb.4389: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4390: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_4392
+; X86-NEXT:  # %bb.4391: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_4392: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edi
+; X86-NEXT:    jb .LBB5_4394
+; X86-NEXT:  # %bb.4393: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4394: # %udiv-bb1
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB5_4396
+; X86-NEXT:  # %bb.4395: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_4396: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4398
+; X86-NEXT:  # %bb.4397: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4398: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4400
+; X86-NEXT:  # %bb.4399: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4400: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_4402
+; X86-NEXT:  # %bb.4401: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4402: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_4404
+; X86-NEXT:  # %bb.4403:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB5_4404: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4406
+; X86-NEXT:  # %bb.4405: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4406: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jb .LBB5_4407
+; X86-NEXT:  # %bb.4408: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4409
+; X86-NEXT:  .LBB5_4410: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4411
+; X86-NEXT:  .LBB5_4412: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4414
+; X86-NEXT:    jmp .LBB5_4415
+; X86-NEXT:  .LBB5_4315: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4318
+; X86-NEXT:  .LBB5_4317:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4320
+; X86-NEXT:  .LBB5_4319: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_4321
+; X86-NEXT:    jmp .LBB5_4322
+; X86-NEXT:  .LBB5_4329: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4332
+; X86-NEXT:  .LBB5_4331: # %udiv-bb1
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4334
+; X86-NEXT:  .LBB5_4333: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jae .LBB5_4335
+; X86-NEXT:    jmp .LBB5_4336
+; X86-NEXT:  .LBB5_4337: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4340
+; X86-NEXT:  .LBB5_4339: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4341
+; X86-NEXT:    jmp .LBB5_4342
+; X86-NEXT:  .LBB5_4407:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4410
+; X86-NEXT:  .LBB5_4409:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4412
+; X86-NEXT:  .LBB5_4411:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4415
+; X86-NEXT:  .LBB5_4414: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4415: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4416
+; X86-NEXT:  # %bb.4417: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4418
+; X86-NEXT:  .LBB5_4419: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    jae .LBB5_4420
+; X86-NEXT:  .LBB5_4421: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_4423
+; X86-NEXT:  .LBB5_4422: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_4423: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4425
+; X86-NEXT:  # %bb.4424: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4425: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4427
+; X86-NEXT:  # %bb.4426: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4427: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4429
+; X86-NEXT:  # %bb.4428: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_4429: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jae .LBB5_4431
+; X86-NEXT:  # %bb.4430:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4431: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_4433
+; X86-NEXT:  # %bb.4432:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4433: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4435
+; X86-NEXT:  # %bb.4434: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4435: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4437
+; X86-NEXT:  # %bb.4436: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4437: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4439
+; X86-NEXT:  # %bb.4438: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4439: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4441
+; X86-NEXT:  # %bb.4440: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4441: # %udiv-bb1
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4443
+; X86-NEXT:  # %bb.4442: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4443: # %udiv-bb1
+; X86-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4445
+; X86-NEXT:  # %bb.4444:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4445: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4447
+; X86-NEXT:  # %bb.4446: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_4447: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4449
+; X86-NEXT:  # %bb.4448:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %ecx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4449: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4451
+; X86-NEXT:  # %bb.4450: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_4451: # %udiv-bb1
+; X86-NEXT:    cmpl $0, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_4453
+; X86-NEXT:  # %bb.4452: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_4453: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    jne .LBB5_4454
+; X86-NEXT:  # %bb.4455: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4456
+; X86-NEXT:  .LBB5_4457: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4458
+; X86-NEXT:  .LBB5_4459: # %udiv-bb1
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_4461
+; X86-NEXT:  .LBB5_4460:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %edx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4461: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB5_4463
+; X86-NEXT:  # %bb.4462: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_4463: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4465
+; X86-NEXT:  # %bb.4464:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4465: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_4467
+; X86-NEXT:  # %bb.4466: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4467: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_4469
+; X86-NEXT:  # %bb.4468:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4469: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4471
+; X86-NEXT:  # %bb.4470:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4471: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    je .LBB5_4473
+; X86-NEXT:  # %bb.4472: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4473: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB5_4475
+; X86-NEXT:  # %bb.4474: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_4475: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_4477
+; X86-NEXT:  # %bb.4476: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_4477: # %udiv-bb1
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_4479
+; X86-NEXT:  # %bb.4478:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4479: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $256, %ebx # imm = 0x100
+; X86-NEXT:    jb .LBB5_4481
+; X86-NEXT:  # %bb.4480: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4481: # %udiv-bb1
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_4483
+; X86-NEXT:  # %bb.4482:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4483: # %udiv-bb1
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_4485
+; X86-NEXT:  # %bb.4484: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4485: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_4487
+; X86-NEXT:  # %bb.4486: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_4487: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_4489
+; X86-NEXT:  # %bb.4488: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_4489: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4491
+; X86-NEXT:  # %bb.4490: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_4491: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_4493
+; X86-NEXT:  # %bb.4492: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_4493: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB5_4495
+; X86-NEXT:  # %bb.4494:
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB5_4495: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4497
+; X86-NEXT:  # %bb.4496: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4497: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_4499
+; X86-NEXT:  # %bb.4498: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_4499: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jae .LBB5_4501
+; X86-NEXT:  # %bb.4500:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %edx, %edi
+; X86-NEXT:  .LBB5_4501: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_4503
+; X86-NEXT:  # %bb.4502: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4503: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_4505
+; X86-NEXT:  # %bb.4504: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4505: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_4507
+; X86-NEXT:  # %bb.4506: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4507: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4509
+; X86-NEXT:  # %bb.4508: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4509: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jb .LBB5_4511
+; X86-NEXT:  # %bb.4510: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4511: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4513
+; X86-NEXT:  # %bb.4512: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4513: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4515
+; X86-NEXT:  # %bb.4514: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4515: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4517
+; X86-NEXT:  # %bb.4516: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4517: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jae .LBB5_4519
+; X86-NEXT:  # %bb.4518:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4519: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4521
+; X86-NEXT:  # %bb.4520: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4521: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4523
+; X86-NEXT:  # %bb.4522:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4523: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_4525
+; X86-NEXT:  # %bb.4524: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4525: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_4527
+; X86-NEXT:  # %bb.4526:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4527: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4529
+; X86-NEXT:  # %bb.4528: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4529: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4531
+; X86-NEXT:  # %bb.4530: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4531: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4533
+; X86-NEXT:  # %bb.4532: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4533: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4535
+; X86-NEXT:  # %bb.4534: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4535: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4537
+; X86-NEXT:  # %bb.4536: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4537: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB5_4539
+; X86-NEXT:  # %bb.4538: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_4539: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4541
+; X86-NEXT:  # %bb.4540: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4541: # %udiv-bb1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_4543
+; X86-NEXT:  # %bb.4542: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4543: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_4544
+; X86-NEXT:  # %bb.4545: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_4546
+; X86-NEXT:  .LBB5_4547: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edi
+; X86-NEXT:    jae .LBB5_4549
+; X86-NEXT:  .LBB5_4548:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4549: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $128, %esi
+; X86-NEXT:    jb .LBB5_4551
+; X86-NEXT:  # %bb.4550: # %udiv-bb1
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:  .LBB5_4551: # %udiv-bb1
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    je .LBB5_4553
+; X86-NEXT:  # %bb.4552: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_4553: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $256, %ebx # imm = 0x100
+; X86-NEXT:    jb .LBB5_4555
+; X86-NEXT:  # %bb.4554: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4555: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jae .LBB5_4557
+; X86-NEXT:  # %bb.4556:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %edx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4557: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4559
+; X86-NEXT:  # %bb.4558:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4559: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    je .LBB5_4561
+; X86-NEXT:  # %bb.4560: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_4561: # %udiv-bb1
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    je .LBB5_4563
+; X86-NEXT:  # %bb.4562: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_4563: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_4565
+; X86-NEXT:  # %bb.4564: # %udiv-bb1
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB5_4565: # %udiv-bb1
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    jae .LBB5_4567
+; X86-NEXT:  # %bb.4566:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4567: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4569
+; X86-NEXT:  # %bb.4568: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_4569: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_4571
+; X86-NEXT:  # %bb.4570: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4571: # %udiv-bb1
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    jae .LBB5_4573
+; X86-NEXT:  # %bb.4572:
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4573: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $256, %edx # imm = 0x100
+; X86-NEXT:    jb .LBB5_4575
+; X86-NEXT:  # %bb.4574: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4575: # %udiv-bb1
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    jae .LBB5_4577
+; X86-NEXT:  # %bb.4576:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB5_4577: # %udiv-bb1
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    je .LBB5_4578
+; X86-NEXT:  # %bb.4579: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4580
+; X86-NEXT:  .LBB5_4581: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4583
+; X86-NEXT:  .LBB5_4582: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4583: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4585
+; X86-NEXT:  # %bb.4584: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4585: # %udiv-bb1
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4587
+; X86-NEXT:  # %bb.4586: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4587: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4589
+; X86-NEXT:  # %bb.4588: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4589: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4591
+; X86-NEXT:  # %bb.4590: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4591: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_4593
+; X86-NEXT:  # %bb.4592:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4593: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_4595
+; X86-NEXT:  # %bb.4594: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_4595: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4597
+; X86-NEXT:  # %bb.4596:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %ebx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4597: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $128, %ebx
+; X86-NEXT:    jae .LBB5_4599
+; X86-NEXT:  # %bb.4598:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %ecx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4599: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    je .LBB5_4601
+; X86-NEXT:  # %bb.4600: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_4601: # %udiv-bb1
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    jne .LBB5_4602
+; X86-NEXT:  # %bb.4603: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4604
+; X86-NEXT:  .LBB5_4605: # %udiv-bb1
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_4607
+; X86-NEXT:  .LBB5_4606:
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:  .LBB5_4607: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_4609
+; X86-NEXT:  # %bb.4608: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_4609: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4611
+; X86-NEXT:  # %bb.4610: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_4611: # %udiv-bb1
+; X86-NEXT:    cmpl $256, %esi # imm = 0x100
+; X86-NEXT:    jae .LBB5_4613
+; X86-NEXT:  # %bb.4612:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4613: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    je .LBB5_4615
+; X86-NEXT:  # %bb.4614: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_4615: # %udiv-bb1
+; X86-NEXT:    cmpl $512, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    jae .LBB5_4617
+; X86-NEXT:  # %bb.4616:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4617: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4619
+; X86-NEXT:  # %bb.4618: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_4619: # %udiv-bb1
+; X86-NEXT:    cmpl $512, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jae .LBB5_4621
+; X86-NEXT:  # %bb.4620:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4621: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_4623
+; X86-NEXT:  # %bb.4622: # %udiv-bb1
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB5_4623: # %udiv-bb1
+; X86-NEXT:    cmpl $512, %edx # imm = 0x200
+; X86-NEXT:    jae .LBB5_4625
+; X86-NEXT:  # %bb.4624:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4625: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_4627
+; X86-NEXT:  # %bb.4626: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB5_4627: # %udiv-bb1
+; X86-NEXT:    cmpl $512, %edx # imm = 0x200
+; X86-NEXT:    jae .LBB5_4629
+; X86-NEXT:  # %bb.4628:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4629: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_4631
+; X86-NEXT:  # %bb.4630: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4631: # %udiv-bb1
+; X86-NEXT:    cmpl $512, %edx # imm = 0x200
+; X86-NEXT:    jae .LBB5_4633
+; X86-NEXT:  # %bb.4632:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4633: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_4635
+; X86-NEXT:  # %bb.4634:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:  .LBB5_4635: # %udiv-bb1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_4637
+; X86-NEXT:  # %bb.4636: # %udiv-bb1
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_4637: # %udiv-bb1
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_4639
+; X86-NEXT:  # %bb.4638:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4639: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_4641
+; X86-NEXT:  # %bb.4640: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4641: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    jae .LBB5_4643
+; X86-NEXT:  # %bb.4642:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4643: # %udiv-bb1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4645
+; X86-NEXT:  # %bb.4644: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_4645: # %udiv-bb1
+; X86-NEXT:    cmpl $512, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jae .LBB5_4647
+; X86-NEXT:  # %bb.4646:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4647: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    je .LBB5_4649
+; X86-NEXT:  # %bb.4648: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_4649: # %udiv-bb1
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jb .LBB5_4651
+; X86-NEXT:  # %bb.4650: # %udiv-bb1
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:  .LBB5_4651: # %udiv-bb1
+; X86-NEXT:    cmpl $512, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jae .LBB5_4653
+; X86-NEXT:  # %bb.4652:
+; X86-NEXT:    orl %ebx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4653: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $256, %ebx # imm = 0x100
+; X86-NEXT:    jae .LBB5_4655
+; X86-NEXT:  # %bb.4654:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4655: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_4657
+; X86-NEXT:  # %bb.4656: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_4657: # %udiv-bb1
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_4659
+; X86-NEXT:  # %bb.4658: # %udiv-bb1
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:  .LBB5_4659: # %udiv-bb1
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_4661
+; X86-NEXT:  # %bb.4660: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_4661: # %udiv-bb1
+; X86-NEXT:    cmpl $512, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jae .LBB5_4663
+; X86-NEXT:  # %bb.4662:
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_4663: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $512, %edx # imm = 0x200
+; X86-NEXT:    jae .LBB5_4665
+; X86-NEXT:  # %bb.4664:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4665: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    je .LBB5_4667
+; X86-NEXT:  # %bb.4666: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4667: # %udiv-bb1
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    jne .LBB5_4668
+; X86-NEXT:  # %bb.4669: # %udiv-bb1
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_4670
+; X86-NEXT:  .LBB5_4671: # %udiv-bb1
+; X86-NEXT:    cmpl $512, %edx # imm = 0x200
+; X86-NEXT:    jae .LBB5_4673
+; X86-NEXT:  .LBB5_4672:
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4673: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_4675
+; X86-NEXT:  # %bb.4674: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_4675: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_4677
+; X86-NEXT:  # %bb.4676: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4677: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_4679
+; X86-NEXT:  # %bb.4678: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_4679: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_4681
+; X86-NEXT:  # %bb.4680: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_4681: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_4683
+; X86-NEXT:  # %bb.4682: # %udiv-bb1
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB5_4683: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_4685
+; X86-NEXT:  # %bb.4684: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_4685: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_4687
+; X86-NEXT:  # %bb.4686: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4687: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_4689
+; X86-NEXT:  # %bb.4688: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_4689: # %udiv-bb1
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_4691
+; X86-NEXT:  # %bb.4690: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4691: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    je .LBB5_4692
+; X86-NEXT:  # %bb.4693: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4694
+; X86-NEXT:  .LBB5_4695: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_4696
+; X86-NEXT:  .LBB5_4697: # %udiv-bb1
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jb .LBB5_4699
+; X86-NEXT:  .LBB5_4698: # %udiv-bb1
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB5_4699: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4701
+; X86-NEXT:  # %bb.4700: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4701: # %udiv-bb1
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4703
+; X86-NEXT:  # %bb.4702: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4703: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4705
+; X86-NEXT:  # %bb.4704: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4705: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4707
+; X86-NEXT:  # %bb.4706: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4707: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4709
+; X86-NEXT:  # %bb.4708: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4709: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4711
+; X86-NEXT:  # %bb.4710: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4711: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4713
+; X86-NEXT:  # %bb.4712: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4713: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_4715
+; X86-NEXT:  # %bb.4714: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4715: # %udiv-bb1
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4717
+; X86-NEXT:  # %bb.4716:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4717: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4719
+; X86-NEXT:  # %bb.4718:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %ebp, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4719: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4721
+; X86-NEXT:  # %bb.4720:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4721: # %udiv-bb1
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jae .LBB5_4723
+; X86-NEXT:  # %bb.4722:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4723: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4725
+; X86-NEXT:  # %bb.4724: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_4725: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    je .LBB5_4727
+; X86-NEXT:  # %bb.4726: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_4727: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_4728
+; X86-NEXT:  # %bb.4729: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_4730
+; X86-NEXT:  .LBB5_4731: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4733
+; X86-NEXT:  .LBB5_4732:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_4733: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4735
+; X86-NEXT:  # %bb.4734: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_4735: # %udiv-bb1
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4737
+; X86-NEXT:  # %bb.4736:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB5_4737: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_4739
+; X86-NEXT:  # %bb.4738:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4739: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    je .LBB5_4741
+; X86-NEXT:  # %bb.4740: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_4741: # %udiv-bb1
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    jne .LBB5_4742
+; X86-NEXT:  # %bb.4743: # %udiv-bb1
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_4744
+; X86-NEXT:  .LBB5_4745: # %udiv-bb1
+; X86-NEXT:    cmpl $512, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jae .LBB5_4747
+; X86-NEXT:  .LBB5_4746:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:  .LBB5_4747: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_4749
+; X86-NEXT:  # %bb.4748:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_4749: # %udiv-bb1
+; X86-NEXT:    cmpl $0, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    je .LBB5_4751
+; X86-NEXT:  # %bb.4750: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4751: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    je .LBB5_4753
+; X86-NEXT:  # %bb.4752: # %udiv-bb1
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:  .LBB5_4753: # %udiv-bb1
+; X86-NEXT:    cmpl $512, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_4755
+; X86-NEXT:  # %bb.4754:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB5_4755: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $256, %esi # imm = 0x100
+; X86-NEXT:    jae .LBB5_4757
+; X86-NEXT:  # %bb.4756:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_4757: # %udiv-bb1
+; X86-NEXT:    cmpl $0, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    je .LBB5_4759
+; X86-NEXT:  # %bb.4758: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4759: # %udiv-bb1
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4761
+; X86-NEXT:  # %bb.4760: # %udiv-bb1
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:  .LBB5_4761: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $512, %ecx # imm = 0x200
+; X86-NEXT:    jae .LBB5_4763
+; X86-NEXT:  # %bb.4762:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_4763: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $256, %esi # imm = 0x100
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_4765
+; X86-NEXT:  # %bb.4764:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:  .LBB5_4765: # %udiv-bb1
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4767
+; X86-NEXT:  # %bb.4766: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_4767: # %udiv-bb1
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4769
+; X86-NEXT:  # %bb.4768: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_4769: # %udiv-bb1
+; X86-NEXT:    cmpl $512, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jae .LBB5_4771
+; X86-NEXT:  # %bb.4770:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_4771: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $256, %esi # imm = 0x100
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_4773
+; X86-NEXT:  # %bb.4772:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4773: # %udiv-bb1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    je .LBB5_4775
+; X86-NEXT:  # %bb.4774: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_4775: # %udiv-bb1
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_4777
+; X86-NEXT:  # %bb.4776: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4777: # %udiv-bb1
+; X86-NEXT:    cmpl $512, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jae .LBB5_4779
+; X86-NEXT:  # %bb.4778:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_4779: # %udiv-bb1
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $256, %esi # imm = 0x100
+; X86-NEXT:    jae .LBB5_4781
+; X86-NEXT:  # %bb.4780:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4781: # %udiv-bb1
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB5_4783
+; X86-NEXT:  # %bb.4782: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_4783: # %udiv-bb1
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_4785
+; X86-NEXT:  # %bb.4784: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_4785: # %udiv-bb1
+; X86-NEXT:    cmpl $512, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_4787
+; X86-NEXT:  # %bb.4786:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:  .LBB5_4787: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl (%esp), %esi # 4-byte Reload
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_4789
+; X86-NEXT:  # %bb.4788: # %udiv-bb1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_4789: # %udiv-bb1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_4791
+; X86-NEXT:  # %bb.4790: # %udiv-bb1
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4791: # %udiv-bb1
+; X86-NEXT:    cmpl $256, %esi # imm = 0x100
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jb .LBB5_4793
+; X86-NEXT:  # %bb.4792: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4793: # %udiv-bb1
+; X86-NEXT:    cmpl $512, %esi # imm = 0x200
+; X86-NEXT:    jb .LBB5_4795
+; X86-NEXT:  # %bb.4794: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_4795: # %udiv-bb1
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    addl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl $0, %ecx
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl $0, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $511, %eax # imm = 0x1FF
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ebx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ebp, %edi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %edx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %ebx, %esi
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    orl %edi, %esi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    je .LBB5_4796
+; X86-NEXT:  # %bb.174: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_176
+; X86-NEXT:  # %bb.175: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_176: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_178
+; X86-NEXT:  # %bb.177: # %udiv-preheader
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_178: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_180
+; X86-NEXT:  # %bb.179: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:  .LBB5_180: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_182
+; X86-NEXT:  # %bb.181: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_182: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_184
+; X86-NEXT:  # %bb.183: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_184: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_186
+; X86-NEXT:  # %bb.185: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:  .LBB5_186: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_188
+; X86-NEXT:  # %bb.187: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:  .LBB5_188: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_190
+; X86-NEXT:  # %bb.189: # %udiv-preheader
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:  .LBB5_190: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_192
+; X86-NEXT:  # %bb.191: # %udiv-preheader
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:  .LBB5_192: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_194
+; X86-NEXT:  # %bb.193: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_194: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_196
+; X86-NEXT:  # %bb.195: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB5_196: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_198
+; X86-NEXT:  # %bb.197: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_198: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_200
+; X86-NEXT:  # %bb.199: # %udiv-preheader
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:  .LBB5_200: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_202
+; X86-NEXT:  # %bb.201: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_202: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    jb .LBB5_204
+; X86-NEXT:  # %bb.203: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_204: # %udiv-preheader
+; X86-NEXT:    leal -128(%eax), %ecx
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_206
+; X86-NEXT:  # %bb.205: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_206: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_208
+; X86-NEXT:  # %bb.207: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_208: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_210
+; X86-NEXT:  # %bb.209: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_210: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    addl $-512, %eax # imm = 0xFE00
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    negl %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    leal -256(%eax), %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_212
+; X86-NEXT:  # %bb.211: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_212: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_214
+; X86-NEXT:  # %bb.213: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_214: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_216
+; X86-NEXT:  # %bb.215: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_216: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_218
+; X86-NEXT:  # %bb.217: # %udiv-preheader
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB5_218: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    subl $128, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jb .LBB5_220
+; X86-NEXT:  # %bb.219: # %udiv-preheader
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB5_220: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    leal -512(%ebx), %ecx
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_222
+; X86-NEXT:  # %bb.221: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_222: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    jb .LBB5_224
+; X86-NEXT:  # %bb.223: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_224: # %udiv-preheader
+; X86-NEXT:    movb %al, %ch
+; X86-NEXT:    addb $-64, %ch
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shrdl %cl, %ebp, %ebx
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_226
+; X86-NEXT:  # %bb.225:
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:  .LBB5_226: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %al, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %edx
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB5_228
+; X86-NEXT:  # %bb.227:
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB5_228: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_230
+; X86-NEXT:  # %bb.229:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ebx
+; X86-NEXT:  .LBB5_230: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl (%esp), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jae .LBB5_232
+; X86-NEXT:  # %bb.231:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %edx, %ebp
+; X86-NEXT:  .LBB5_232: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_234
+; X86-NEXT:  # %bb.233: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_234: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    leal -384(%eax), %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_236
+; X86-NEXT:  # %bb.235: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_236: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_238
+; X86-NEXT:  # %bb.237: # %udiv-preheader
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_238: # %udiv-preheader
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_239
+; X86-NEXT:  # %bb.240: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_241
+; X86-NEXT:  .LBB5_4416: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4419
+; X86-NEXT:  .LBB5_4418: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    jb .LBB5_4421
+; X86-NEXT:  .LBB5_4420: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_4422
+; X86-NEXT:    jmp .LBB5_4423
+; X86-NEXT:  .LBB5_4454: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4457
+; X86-NEXT:  .LBB5_4456:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4459
+; X86-NEXT:  .LBB5_4458:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_4460
+; X86-NEXT:    jmp .LBB5_4461
+; X86-NEXT:  .LBB5_4544: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_4547
+; X86-NEXT:  .LBB5_4546:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %edi
+; X86-NEXT:    jb .LBB5_4548
+; X86-NEXT:    jmp .LBB5_4549
+; X86-NEXT:  .LBB5_4578: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4581
+; X86-NEXT:  .LBB5_4580: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_4582
+; X86-NEXT:    jmp .LBB5_4583
+; X86-NEXT:  .LBB5_4602: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4605
+; X86-NEXT:  .LBB5_4604: # %udiv-bb1
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_4606
+; X86-NEXT:    jmp .LBB5_4607
+; X86-NEXT:  .LBB5_4668: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_4671
+; X86-NEXT:  .LBB5_4670: # %udiv-bb1
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    cmpl $512, %edx # imm = 0x200
+; X86-NEXT:    jb .LBB5_4672
+; X86-NEXT:    jmp .LBB5_4673
+; X86-NEXT:  .LBB5_4692: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4695
+; X86-NEXT:  .LBB5_4694: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_4697
+; X86-NEXT:  .LBB5_4696: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jae .LBB5_4698
+; X86-NEXT:    jmp .LBB5_4699
+; X86-NEXT:  .LBB5_4728: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_4731
+; X86-NEXT:  .LBB5_4730: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_4732
+; X86-NEXT:    jmp .LBB5_4733
+; X86-NEXT:  .LBB5_4742: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_4745
+; X86-NEXT:  .LBB5_4744: # %udiv-bb1
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    cmpl $512, (%esp) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jb .LBB5_4746
+; X86-NEXT:    jmp .LBB5_4747
+; X86-NEXT:  .LBB5_4796:
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jmp .LBB5_173
+; X86-NEXT:  .LBB5_239:
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_241: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_243
+; X86-NEXT:  # %bb.242: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_243: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, (%esp) # 4-byte Spill
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_245
+; X86-NEXT:  # %bb.244: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_245: # %udiv-preheader
+; X86-NEXT:    leal -256(%eax), %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_247
+; X86-NEXT:  # %bb.246: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_247: # %udiv-preheader
+; X86-NEXT:    leal -256(%esi), %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_249
+; X86-NEXT:  # %bb.248: # %udiv-preheader
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_249: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_251
+; X86-NEXT:  # %bb.250: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_251: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_253
+; X86-NEXT:  # %bb.252: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_253: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_255
+; X86-NEXT:  # %bb.254: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_255: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_257
+; X86-NEXT:  # %bb.256: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB5_257: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    subl $128, %eax
+; X86-NEXT:    jb .LBB5_259
+; X86-NEXT:  # %bb.258: # %udiv-preheader
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_259: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    negl %eax
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %al, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    jne .LBB5_261
+; X86-NEXT:  # %bb.260: # %udiv-preheader
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_261: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_263
+; X86-NEXT:  # %bb.262: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB5_263: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_265
+; X86-NEXT:  # %bb.264: # %udiv-preheader
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:  .LBB5_265: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_267
+; X86-NEXT:  # %bb.266:
+; X86-NEXT:    orl %esi, %ebp
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB5_267: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_268
+; X86-NEXT:  # %bb.269: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_270
+; X86-NEXT:  .LBB5_271: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_272
+; X86-NEXT:  .LBB5_273: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_275
+; X86-NEXT:  .LBB5_274: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_275: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_277
+; X86-NEXT:  # %bb.276: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_277: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    jne .LBB5_279
+; X86-NEXT:  # %bb.278: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:  .LBB5_279: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_281
+; X86-NEXT:  # %bb.280: # %udiv-preheader
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:  .LBB5_281: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_283
+; X86-NEXT:  # %bb.282:
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB5_283: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_285
+; X86-NEXT:  # %bb.284: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB5_285: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_287
+; X86-NEXT:  # %bb.286:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_287: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_289
+; X86-NEXT:  # %bb.288: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_289: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_291
+; X86-NEXT:  # %bb.290: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_291: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb %al, %cl
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_293
+; X86-NEXT:  # %bb.292: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_293: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb %al, %cl
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_295
+; X86-NEXT:  # %bb.294: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_295: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb %al, %cl
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_297
+; X86-NEXT:  # %bb.296: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_297: # %udiv-preheader
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_299
+; X86-NEXT:  # %bb.298: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_299: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_301
+; X86-NEXT:  # %bb.300: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_301: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    jne .LBB5_303
+; X86-NEXT:  # %bb.302: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_303: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_305
+; X86-NEXT:  # %bb.304: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_305: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_307
+; X86-NEXT:  # %bb.306: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_307: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_309
+; X86-NEXT:  # %bb.308: # %udiv-preheader
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_309: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_311
+; X86-NEXT:  # %bb.310: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_311: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_313
+; X86-NEXT:  # %bb.312: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_313: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_315
+; X86-NEXT:  # %bb.314: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_315: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_317
+; X86-NEXT:  # %bb.316: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_317: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_319
+; X86-NEXT:  # %bb.318: # %udiv-preheader
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_319: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_321
+; X86-NEXT:  # %bb.320: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_321: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_323
+; X86-NEXT:  # %bb.322: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_323: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_325
+; X86-NEXT:  # %bb.324: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_325: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_327
+; X86-NEXT:  # %bb.326: # %udiv-preheader
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB5_327: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_329
+; X86-NEXT:  # %bb.328: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_329: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_331
+; X86-NEXT:  # %bb.330: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB5_331: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_333
+; X86-NEXT:  # %bb.332: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:  .LBB5_333: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    jne .LBB5_335
+; X86-NEXT:  # %bb.334: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_335: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_337
+; X86-NEXT:  # %bb.336: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_337: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_339
+; X86-NEXT:  # %bb.338: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_339: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_341
+; X86-NEXT:  # %bb.340: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:  .LBB5_341: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB5_343
+; X86-NEXT:  # %bb.342: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_343: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_345
+; X86-NEXT:  # %bb.344: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:  .LBB5_345: # %udiv-preheader
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_347
+; X86-NEXT:  # %bb.346: # %udiv-preheader
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_347: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_349
+; X86-NEXT:  # %bb.348: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_349: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jae .LBB5_350
+; X86-NEXT:  # %bb.351: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_352
+; X86-NEXT:  .LBB5_353: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_355
+; X86-NEXT:  .LBB5_354:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_355: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_357
+; X86-NEXT:  # %bb.356: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:  .LBB5_357: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $384, %eax # imm = 0x180
+; X86-NEXT:    subl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    jne .LBB5_359
+; X86-NEXT:  # %bb.358: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_359: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_361
+; X86-NEXT:  # %bb.360: # %udiv-preheader
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:  .LBB5_361: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_363
+; X86-NEXT:  # %bb.362:
+; X86-NEXT:    orl %esi, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_363: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_365
+; X86-NEXT:  # %bb.364:
+; X86-NEXT:    orl %edx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_365: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_367
+; X86-NEXT:  # %bb.366: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_367: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    jne .LBB5_369
+; X86-NEXT:  # %bb.368: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_369: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_371
+; X86-NEXT:  # %bb.370:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %ecx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_371: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_373
+; X86-NEXT:  # %bb.372: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_373: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    jne .LBB5_375
+; X86-NEXT:  # %bb.374: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_375: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jae .LBB5_377
+; X86-NEXT:  # %bb.376:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_377: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    je .LBB5_379
+; X86-NEXT:  # %bb.378: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_379: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_381
+; X86-NEXT:  # %bb.380:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_381: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %ebp
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_383
+; X86-NEXT:  # %bb.382: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:  .LBB5_383: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_385
+; X86-NEXT:  # %bb.384: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_385: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_387
+; X86-NEXT:  # %bb.386: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_387: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_389
+; X86-NEXT:  # %bb.388: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB5_389: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    jne .LBB5_391
+; X86-NEXT:  # %bb.390: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_391: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_393
+; X86-NEXT:  # %bb.392: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB5_393: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_395
+; X86-NEXT:  # %bb.394: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB5_395: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_397
+; X86-NEXT:  # %bb.396: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB5_397: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_399
+; X86-NEXT:  # %bb.398: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_399: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_401
+; X86-NEXT:  # %bb.400: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_401: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_403
+; X86-NEXT:  # %bb.402: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_403: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jb .LBB5_405
+; X86-NEXT:  # %bb.404: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_405: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_407
+; X86-NEXT:  # %bb.406: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_407: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_409
+; X86-NEXT:  # %bb.408: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_409: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_411
+; X86-NEXT:  # %bb.410: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_411: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    jae .LBB5_413
+; X86-NEXT:  # %bb.412:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_413: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_415
+; X86-NEXT:  # %bb.414: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB5_415: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_416
+; X86-NEXT:  # %bb.417: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_418
+; X86-NEXT:  .LBB5_419: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_420
+; X86-NEXT:  .LBB5_421: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_423
+; X86-NEXT:  .LBB5_422:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_423: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_425
+; X86-NEXT:  # %bb.424:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_425: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_427
+; X86-NEXT:  # %bb.426: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_427: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_429
+; X86-NEXT:  # %bb.428: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_429: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_431
+; X86-NEXT:  # %bb.430: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_431: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl $384, %ecx # imm = 0x180
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_433
+; X86-NEXT:  # %bb.432: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_433: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_435
+; X86-NEXT:  # %bb.434: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:  .LBB5_435: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_437
+; X86-NEXT:  # %bb.436: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_437: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_439
+; X86-NEXT:  # %bb.438: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_439: # %udiv-preheader
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_441
+; X86-NEXT:  # %bb.440: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_441: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_443
+; X86-NEXT:  # %bb.442: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_443: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_445
+; X86-NEXT:  # %bb.444: # %udiv-preheader
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_445: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_447
+; X86-NEXT:  # %bb.446: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_447: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_449
+; X86-NEXT:  # %bb.448: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_449: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_451
+; X86-NEXT:  # %bb.450: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_451: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_453
+; X86-NEXT:  # %bb.452: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB5_453: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_455
+; X86-NEXT:  # %bb.454: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_455: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_457
+; X86-NEXT:  # %bb.456: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_457: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_459
+; X86-NEXT:  # %bb.458: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_459: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_461
+; X86-NEXT:  # %bb.460: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_461: # %udiv-preheader
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_463
+; X86-NEXT:  # %bb.462: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_463: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_465
+; X86-NEXT:  # %bb.464: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_465: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_467
+; X86-NEXT:  # %bb.466: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_467: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %edi
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_469
+; X86-NEXT:  # %bb.468: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_469: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_471
+; X86-NEXT:  # %bb.470: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_471: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_473
+; X86-NEXT:  # %bb.472: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_473: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jb .LBB5_475
+; X86-NEXT:  # %bb.474: # %udiv-preheader
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:  .LBB5_475: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_477
+; X86-NEXT:  # %bb.476: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_477: # %udiv-preheader
+; X86-NEXT:    leal -640(%eax), %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_479
+; X86-NEXT:  # %bb.478: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:  .LBB5_479: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %edi
+; X86-NEXT:    jb .LBB5_481
+; X86-NEXT:  # %bb.480: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_481: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_483
+; X86-NEXT:  # %bb.482: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_483: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_485
+; X86-NEXT:  # %bb.484: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_485: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_487
+; X86-NEXT:  # %bb.486: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_487: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_489
+; X86-NEXT:  # %bb.488: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB5_489: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_491
+; X86-NEXT:  # %bb.490: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_491: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %al, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %edi
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_492
+; X86-NEXT:  # %bb.493: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_494
+; X86-NEXT:  .LBB5_268: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_271
+; X86-NEXT:  .LBB5_270: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_273
+; X86-NEXT:  .LBB5_272:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_274
+; X86-NEXT:    jmp .LBB5_275
+; X86-NEXT:  .LBB5_350: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_353
+; X86-NEXT:  .LBB5_352:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_354
+; X86-NEXT:    jmp .LBB5_355
+; X86-NEXT:  .LBB5_416:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_419
+; X86-NEXT:  .LBB5_418: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_421
+; X86-NEXT:  .LBB5_420:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_422
+; X86-NEXT:    jmp .LBB5_423
+; X86-NEXT:  .LBB5_492:
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_494: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_496
+; X86-NEXT:  # %bb.495: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_496: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_498
+; X86-NEXT:  # %bb.497: # %udiv-preheader
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_498: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_500
+; X86-NEXT:  # %bb.499: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_500: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jae .LBB5_502
+; X86-NEXT:  # %bb.501:
+; X86-NEXT:    orl %esi, %ebp
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB5_502: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    jne .LBB5_503
+; X86-NEXT:  # %bb.504: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_505
+; X86-NEXT:  .LBB5_506: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_508
+; X86-NEXT:  .LBB5_507:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_508: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_510
+; X86-NEXT:  # %bb.509: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_510: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_511
+; X86-NEXT:  # %bb.512: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_513
+; X86-NEXT:  .LBB5_503: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_506
+; X86-NEXT:  .LBB5_505: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_507
+; X86-NEXT:    jmp .LBB5_508
+; X86-NEXT:  .LBB5_511:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_513: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_515
+; X86-NEXT:  # %bb.514: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB5_515: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_517
+; X86-NEXT:  # %bb.516: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:  .LBB5_517: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %eax
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_519
+; X86-NEXT:  # %bb.518: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB5_519: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_521
+; X86-NEXT:  # %bb.520: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_521: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_523
+; X86-NEXT:  # %bb.522: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_523: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_525
+; X86-NEXT:  # %bb.524: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_525: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_527
+; X86-NEXT:  # %bb.526: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_527: # %udiv-preheader
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_529
+; X86-NEXT:  # %bb.528: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_529: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_530
+; X86-NEXT:  # %bb.531: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jae .LBB5_532
+; X86-NEXT:  .LBB5_533: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_534
+; X86-NEXT:  .LBB5_535: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_537
+; X86-NEXT:  .LBB5_536: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_537: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    subl $128, %ecx
+; X86-NEXT:    jb .LBB5_539
+; X86-NEXT:  # %bb.538: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_539: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    jb .LBB5_541
+; X86-NEXT:  # %bb.540: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_541: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_543
+; X86-NEXT:  # %bb.542: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB5_543: # %udiv-preheader
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_545
+; X86-NEXT:  # %bb.544: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_545: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_547
+; X86-NEXT:  # %bb.546:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_547: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %dl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_549
+; X86-NEXT:  # %bb.548: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_549: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %ebx
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_551
+; X86-NEXT:  # %bb.550: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:  .LBB5_551: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_553
+; X86-NEXT:  # %bb.552:
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB5_553: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_555
+; X86-NEXT:  # %bb.554: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_555: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_557
+; X86-NEXT:  # %bb.556: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_557: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_559
+; X86-NEXT:  # %bb.558: # %udiv-preheader
+; X86-NEXT:    movl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_559: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_561
+; X86-NEXT:  # %bb.560: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_561: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    jne .LBB5_563
+; X86-NEXT:  # %bb.562: # %udiv-preheader
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_563: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_565
+; X86-NEXT:  # %bb.564: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:  .LBB5_565: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_567
+; X86-NEXT:  # %bb.566: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_567: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_569
+; X86-NEXT:  # %bb.568:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_569: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_571
+; X86-NEXT:  # %bb.570: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_571: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    jne .LBB5_573
+; X86-NEXT:  # %bb.572: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_573: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_575
+; X86-NEXT:  # %bb.574: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_575: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_577
+; X86-NEXT:  # %bb.576: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_577: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    leal -768(%esi), %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_579
+; X86-NEXT:  # %bb.578: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_579: # %udiv-preheader
+; X86-NEXT:    cmpl $256, %esi # imm = 0x100
+; X86-NEXT:    jb .LBB5_581
+; X86-NEXT:  # %bb.580: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_581: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %esi
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_583
+; X86-NEXT:  # %bb.582:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_583: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_585
+; X86-NEXT:  # %bb.584: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:  .LBB5_585: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_587
+; X86-NEXT:  # %bb.586: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_587: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_589
+; X86-NEXT:  # %bb.588: # %udiv-preheader
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:  .LBB5_589: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    je .LBB5_591
+; X86-NEXT:  # %bb.590: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_591: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_593
+; X86-NEXT:  # %bb.592: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_593: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_595
+; X86-NEXT:  # %bb.594: # %udiv-preheader
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:  .LBB5_595: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    je .LBB5_597
+; X86-NEXT:  # %bb.596: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_597: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    je .LBB5_599
+; X86-NEXT:  # %bb.598: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_599: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edi
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_601
+; X86-NEXT:  # %bb.600: # %udiv-preheader
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB5_601: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_603
+; X86-NEXT:  # %bb.602: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_603: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_605
+; X86-NEXT:  # %bb.604: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_605: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_607
+; X86-NEXT:  # %bb.606: # %udiv-preheader
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_607: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_609
+; X86-NEXT:  # %bb.608: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_609: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_611
+; X86-NEXT:  # %bb.610: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_611: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    jne .LBB5_613
+; X86-NEXT:  # %bb.612: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_613: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $256, %ecx # imm = 0x100
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_615
+; X86-NEXT:  # %bb.614: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_615: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_617
+; X86-NEXT:  # %bb.616:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_617: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB5_619
+; X86-NEXT:  # %bb.618: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_619: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_620
+; X86-NEXT:  # %bb.621: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_622
+; X86-NEXT:  .LBB5_623: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_624
+; X86-NEXT:  .LBB5_625: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_626
+; X86-NEXT:  .LBB5_627: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    je .LBB5_629
+; X86-NEXT:  .LBB5_628: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_629: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_631
+; X86-NEXT:  # %bb.630: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_631: # %udiv-preheader
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    jne .LBB5_633
+; X86-NEXT:  # %bb.632: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_633: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $64, %esi
+; X86-NEXT:    jb .LBB5_634
+; X86-NEXT:  # %bb.635: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jmp .LBB5_636
+; X86-NEXT:  .LBB5_530: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jb .LBB5_533
+; X86-NEXT:  .LBB5_532: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_535
+; X86-NEXT:  .LBB5_534: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_536
+; X86-NEXT:    jmp .LBB5_537
+; X86-NEXT:  .LBB5_620: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_623
+; X86-NEXT:  .LBB5_622: # %udiv-preheader
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_625
+; X86-NEXT:  .LBB5_624:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_627
+; X86-NEXT:  .LBB5_626:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne .LBB5_628
+; X86-NEXT:    jmp .LBB5_629
+; X86-NEXT:  .LBB5_634:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:  .LBB5_636: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_638
+; X86-NEXT:  # %bb.637: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_638: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    je .LBB5_640
+; X86-NEXT:  # %bb.639: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:  .LBB5_640: # %udiv-preheader
+; X86-NEXT:    leal -128(%esi), %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %ebx
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_642
+; X86-NEXT:  # %bb.641: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:  .LBB5_642: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_644
+; X86-NEXT:  # %bb.643: # %udiv-preheader
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:  .LBB5_644: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jae .LBB5_646
+; X86-NEXT:  # %bb.645:
+; X86-NEXT:    orl %ebp, %edx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB5_646: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_648
+; X86-NEXT:  # %bb.647: # %udiv-preheader
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:  .LBB5_648: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jae .LBB5_650
+; X86-NEXT:  # %bb.649:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_650: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    je .LBB5_652
+; X86-NEXT:  # %bb.651: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_652: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_654
+; X86-NEXT:  # %bb.653:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:  .LBB5_654: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_656
+; X86-NEXT:  # %bb.655: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_656: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    je .LBB5_658
+; X86-NEXT:  # %bb.657: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_658: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB5_660
+; X86-NEXT:  # %bb.659: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_660: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    jne .LBB5_661
+; X86-NEXT:  # %bb.662: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_663
+; X86-NEXT:  .LBB5_664: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_666
+; X86-NEXT:  .LBB5_665:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_666: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_668
+; X86-NEXT:  # %bb.667: # %udiv-preheader
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:  .LBB5_668: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jb .LBB5_670
+; X86-NEXT:  # %bb.669: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_670: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_672
+; X86-NEXT:  # %bb.671: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_672: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_674
+; X86-NEXT:  # %bb.673: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_674: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    jb .LBB5_676
+; X86-NEXT:  # %bb.675: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_676: # %udiv-preheader
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_678
+; X86-NEXT:  # %bb.677: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_678: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_680
+; X86-NEXT:  # %bb.679: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_680: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_682
+; X86-NEXT:  # %bb.681: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_682: # %udiv-preheader
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_684
+; X86-NEXT:  # %bb.683: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB5_684: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %eax
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_686
+; X86-NEXT:  # %bb.685: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB5_686: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_688
+; X86-NEXT:  # %bb.687: # %udiv-preheader
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_688: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %ebp
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_690
+; X86-NEXT:  # %bb.689: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB5_690: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_692
+; X86-NEXT:  # %bb.691: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_692: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jae .LBB5_694
+; X86-NEXT:  # %bb.693:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:  .LBB5_694: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jae .LBB5_696
+; X86-NEXT:  # %bb.695:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_696: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_698
+; X86-NEXT:  # %bb.697: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB5_698: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_700
+; X86-NEXT:  # %bb.699:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_700: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_702
+; X86-NEXT:  # %bb.701:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_702: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    je .LBB5_704
+; X86-NEXT:  # %bb.703: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_704: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_706
+; X86-NEXT:  # %bb.705: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_706: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_708
+; X86-NEXT:  # %bb.707: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_708: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    je .LBB5_710
+; X86-NEXT:  # %bb.709: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_710: # %udiv-preheader
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    jae .LBB5_712
+; X86-NEXT:  # %bb.711:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %ecx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_712: # %udiv-preheader
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_714
+; X86-NEXT:  # %bb.713:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:  .LBB5_714: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    je .LBB5_715
+; X86-NEXT:  # %bb.716: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_717
+; X86-NEXT:  .LBB5_718: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_720
+; X86-NEXT:  .LBB5_719: # %udiv-preheader
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB5_720: # %udiv-preheader
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_722
+; X86-NEXT:  # %bb.721: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_722: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_724
+; X86-NEXT:  # %bb.723: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_724: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_726
+; X86-NEXT:  # %bb.725: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_726: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_728
+; X86-NEXT:  # %bb.727: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_728: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_730
+; X86-NEXT:  # %bb.729: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_730: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_732
+; X86-NEXT:  # %bb.731: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_732: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_734
+; X86-NEXT:  # %bb.733: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_734: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_736
+; X86-NEXT:  # %bb.735: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_736: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_738
+; X86-NEXT:  # %bb.737: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_738: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_740
+; X86-NEXT:  # %bb.739: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_740: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_742
+; X86-NEXT:  # %bb.741: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_742: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_744
+; X86-NEXT:  # %bb.743:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %edx, %ebp
+; X86-NEXT:  .LBB5_744: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_746
+; X86-NEXT:  # %bb.745: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB5_746: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_748
+; X86-NEXT:  # %bb.747:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %esi, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_748: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_750
+; X86-NEXT:  # %bb.749:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:  .LBB5_750: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    je .LBB5_752
+; X86-NEXT:  # %bb.751: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_752: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_754
+; X86-NEXT:  # %bb.753: # %udiv-preheader
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:  .LBB5_754: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB5_756
+; X86-NEXT:  # %bb.755: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_756: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_758
+; X86-NEXT:  # %bb.757: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_758: # %udiv-preheader
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jb .LBB5_759
+; X86-NEXT:  # %bb.760: # %udiv-preheader
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:    jmp .LBB5_761
+; X86-NEXT:  .LBB5_661: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_664
+; X86-NEXT:  .LBB5_663:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_665
+; X86-NEXT:    jmp .LBB5_666
+; X86-NEXT:  .LBB5_715: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_718
+; X86-NEXT:  .LBB5_717: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_719
+; X86-NEXT:    jmp .LBB5_720
+; X86-NEXT:  .LBB5_759:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_761: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_763
+; X86-NEXT:  # %bb.762: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:  .LBB5_763: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_765
+; X86-NEXT:  # %bb.764: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_765: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_767
+; X86-NEXT:  # %bb.766: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_767: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_769
+; X86-NEXT:  # %bb.768: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_769: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_771
+; X86-NEXT:  # %bb.770: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_771: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_773
+; X86-NEXT:  # %bb.772:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_773: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_775
+; X86-NEXT:  # %bb.774:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_775: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_777
+; X86-NEXT:  # %bb.776: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_777: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_779
+; X86-NEXT:  # %bb.778: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_779: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jb .LBB5_780
+; X86-NEXT:  # %bb.781: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_782
+; X86-NEXT:  .LBB5_780:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_782: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_784
+; X86-NEXT:  # %bb.783: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_784: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_786
+; X86-NEXT:  # %bb.785: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB5_786: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_788
+; X86-NEXT:  # %bb.787: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:  .LBB5_788: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    jae .LBB5_789
+; X86-NEXT:  # %bb.790: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_791
+; X86-NEXT:  .LBB5_792: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_794
+; X86-NEXT:  .LBB5_793: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_794: # %udiv-preheader
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_796
+; X86-NEXT:  # %bb.795: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_796: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_798
+; X86-NEXT:  # %bb.797: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_798: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_800
+; X86-NEXT:  # %bb.799: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_800: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_802
+; X86-NEXT:  # %bb.801: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_802: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_804
+; X86-NEXT:  # %bb.803: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:  .LBB5_804: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_806
+; X86-NEXT:  # %bb.805: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_806: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_808
+; X86-NEXT:  # %bb.807: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_808: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_810
+; X86-NEXT:  # %bb.809: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_810: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_812
+; X86-NEXT:  # %bb.811: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_812: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_814
+; X86-NEXT:  # %bb.813: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_814: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    jne .LBB5_816
+; X86-NEXT:  # %bb.815: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB5_816: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_818
+; X86-NEXT:  # %bb.817: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_818: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_820
+; X86-NEXT:  # %bb.819: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_820: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_822
+; X86-NEXT:  # %bb.821: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_822: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_824
+; X86-NEXT:  # %bb.823: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_824: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_826
+; X86-NEXT:  # %bb.825: # %udiv-preheader
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_826: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_828
+; X86-NEXT:  # %bb.827: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_828: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %ebx
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_830
+; X86-NEXT:  # %bb.829: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:  .LBB5_830: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_832
+; X86-NEXT:  # %bb.831: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_832: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %ebx
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_834
+; X86-NEXT:  # %bb.833: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_834: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %ebx
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_836
+; X86-NEXT:  # %bb.835: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:  .LBB5_836: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %ebx
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_838
+; X86-NEXT:  # %bb.837: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:  .LBB5_838: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %ebx
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    jne .LBB5_840
+; X86-NEXT:  # %bb.839: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:  .LBB5_840: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jae .LBB5_842
+; X86-NEXT:  # %bb.841:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_842: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_844
+; X86-NEXT:  # %bb.843: # %udiv-preheader
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_844: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_846
+; X86-NEXT:  # %bb.845: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_846: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jae .LBB5_848
+; X86-NEXT:  # %bb.847:
+; X86-NEXT:    orl %ecx, %ebp
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB5_848: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_850
+; X86-NEXT:  # %bb.849:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_850: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_852
+; X86-NEXT:  # %bb.851:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_852: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_854
+; X86-NEXT:  # %bb.853: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_854: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_856
+; X86-NEXT:  # %bb.855:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_856: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_858
+; X86-NEXT:  # %bb.857: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_858: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_860
+; X86-NEXT:  # %bb.859: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_860: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    je .LBB5_862
+; X86-NEXT:  # %bb.861: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_862: # %udiv-preheader
+; X86-NEXT:    cmpl $256, %ecx # imm = 0x100
+; X86-NEXT:    jb .LBB5_864
+; X86-NEXT:  # %bb.863: # %udiv-preheader
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:  .LBB5_864: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_866
+; X86-NEXT:  # %bb.865: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_866: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_868
+; X86-NEXT:  # %bb.867: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_868: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_870
+; X86-NEXT:  # %bb.869: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_870: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %dl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_872
+; X86-NEXT:  # %bb.871: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_872: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_874
+; X86-NEXT:  # %bb.873:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_874: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_876
+; X86-NEXT:  # %bb.875: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_876: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    jb .LBB5_878
+; X86-NEXT:  # %bb.877: # %udiv-preheader
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:  .LBB5_878: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_880
+; X86-NEXT:  # %bb.879: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_880: # %udiv-preheader
+; X86-NEXT:    movl $768, %ecx # imm = 0x300
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    leal -128(%ecx), %ebx
+; X86-NEXT:    negl %ebx
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_882
+; X86-NEXT:  # %bb.881: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_882: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %edi
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_884
+; X86-NEXT:  # %bb.883: # %udiv-preheader
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:  .LBB5_884: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_886
+; X86-NEXT:  # %bb.885: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_886: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jae .LBB5_888
+; X86-NEXT:  # %bb.887:
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_888: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    je .LBB5_890
+; X86-NEXT:  # %bb.889: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_890: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_892
+; X86-NEXT:  # %bb.891: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB5_892: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_894
+; X86-NEXT:  # %bb.893: # %udiv-preheader
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB5_894: # %udiv-preheader
+; X86-NEXT:    movl $640, %ecx # imm = 0x280
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_896
+; X86-NEXT:  # %bb.895: # %udiv-preheader
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_896: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_898
+; X86-NEXT:  # %bb.897: # %udiv-preheader
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB5_898: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_900
+; X86-NEXT:  # %bb.899:
+; X86-NEXT:    orl %edx, %edi
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB5_900: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    subb %al, %cl
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_902
+; X86-NEXT:  # %bb.901: # %udiv-preheader
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:  .LBB5_902: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %ebp
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    jne .LBB5_904
+; X86-NEXT:  # %bb.903: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB5_904: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_906
+; X86-NEXT:  # %bb.905:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:  .LBB5_906: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    je .LBB5_908
+; X86-NEXT:  # %bb.907: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:  .LBB5_908: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_910
+; X86-NEXT:  # %bb.909: # %udiv-preheader
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB5_910: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_912
+; X86-NEXT:  # %bb.911: # %udiv-preheader
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_912: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    jae .LBB5_914
+; X86-NEXT:  # %bb.913:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_914: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_916
+; X86-NEXT:  # %bb.915: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_916: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT:    jb .LBB5_918
+; X86-NEXT:  # %bb.917: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_918: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_920
+; X86-NEXT:  # %bb.919: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_920: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_922
+; X86-NEXT:  # %bb.921: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_922: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_924
+; X86-NEXT:  # %bb.923: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_924: # %udiv-preheader
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_926
+; X86-NEXT:  # %bb.925: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_926: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_928
+; X86-NEXT:  # %bb.927: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_928: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_930
+; X86-NEXT:  # %bb.929: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_930: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_932
+; X86-NEXT:  # %bb.931: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_932: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_934
+; X86-NEXT:  # %bb.933: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_934: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_936
+; X86-NEXT:  # %bb.935: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_936: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_938
+; X86-NEXT:  # %bb.937:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_938: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    je .LBB5_940
+; X86-NEXT:  # %bb.939: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_940: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_942
+; X86-NEXT:  # %bb.941: # %udiv-preheader
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:  .LBB5_942: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB5_943
+; X86-NEXT:  # %bb.944: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_945
+; X86-NEXT:  .LBB5_946: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_947
+; X86-NEXT:  .LBB5_948: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jae .LBB5_949
+; X86-NEXT:  .LBB5_950: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_952
+; X86-NEXT:  .LBB5_951: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_952: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_954
+; X86-NEXT:  # %bb.953: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_954: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_956
+; X86-NEXT:  # %bb.955:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB5_956: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_958
+; X86-NEXT:  # %bb.957: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_958: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_960
+; X86-NEXT:  # %bb.959: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_960: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jae .LBB5_962
+; X86-NEXT:  # %bb.961:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_962: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_964
+; X86-NEXT:  # %bb.963:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_964: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    jne .LBB5_965
+; X86-NEXT:  # %bb.966: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_967
+; X86-NEXT:  .LBB5_968: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_969
+; X86-NEXT:  .LBB5_970: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_971
+; X86-NEXT:  .LBB5_972: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_973
+; X86-NEXT:  .LBB5_789: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_792
+; X86-NEXT:  .LBB5_791: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_793
+; X86-NEXT:    jmp .LBB5_794
+; X86-NEXT:  .LBB5_943: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_946
+; X86-NEXT:  .LBB5_945: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_948
+; X86-NEXT:  .LBB5_947: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_950
+; X86-NEXT:  .LBB5_949: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_951
+; X86-NEXT:    jmp .LBB5_952
+; X86-NEXT:  .LBB5_965: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_968
+; X86-NEXT:  .LBB5_967: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_970
+; X86-NEXT:  .LBB5_969: # %udiv-preheader
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_972
+; X86-NEXT:  .LBB5_971:
+; X86-NEXT:    orl %eax, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_973: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_975
+; X86-NEXT:  # %bb.974: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_975: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_977
+; X86-NEXT:  # %bb.976: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_977: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_979
+; X86-NEXT:  # %bb.978: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_979: # %udiv-preheader
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_981
+; X86-NEXT:  # %bb.980: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_981: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_983
+; X86-NEXT:  # %bb.982: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_983: # %udiv-preheader
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    shrdl %cl, %ebp, %esi
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
+; X86-NEXT:    jne .LBB5_985
+; X86-NEXT:  # %bb.984: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_985: # %udiv-preheader
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_987
+; X86-NEXT:  # %bb.986: # %udiv-preheader
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:  .LBB5_987: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    je .LBB5_988
+; X86-NEXT:  # %bb.989: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_990
+; X86-NEXT:  .LBB5_991: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_992
+; X86-NEXT:  .LBB5_993: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_994
+; X86-NEXT:  .LBB5_995: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_997
+; X86-NEXT:  .LBB5_996: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_997: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %ebx
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    jne .LBB5_999
+; X86-NEXT:  # %bb.998: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_999: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %ebp
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1001
+; X86-NEXT:  # %bb.1000: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:  .LBB5_1001: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %ebp
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1003
+; X86-NEXT:  # %bb.1002: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:  .LBB5_1003: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1005
+; X86-NEXT:  # %bb.1004:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1005: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1007
+; X86-NEXT:  # %bb.1006:
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB5_1007: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jae .LBB5_1009
+; X86-NEXT:  # %bb.1008:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1009: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_1011
+; X86-NEXT:  # %bb.1010: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_1011: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1013
+; X86-NEXT:  # %bb.1012: # %udiv-preheader
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_1013: # %udiv-preheader
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1015
+; X86-NEXT:  # %bb.1014: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_1015: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_1017
+; X86-NEXT:  # %bb.1016:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %edi, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1017: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1019
+; X86-NEXT:  # %bb.1018:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1019: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jb .LBB5_1021
+; X86-NEXT:  # %bb.1020: # %udiv-preheader
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:  .LBB5_1021: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_1023
+; X86-NEXT:  # %bb.1022: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1023: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_1025
+; X86-NEXT:  # %bb.1024: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1025: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_1027
+; X86-NEXT:  # %bb.1026:
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1027: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_1029
+; X86-NEXT:  # %bb.1028: # %udiv-preheader
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_1029: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_1031
+; X86-NEXT:  # %bb.1030: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_1031: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1033
+; X86-NEXT:  # %bb.1032: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1033: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    jne .LBB5_1035
+; X86-NEXT:  # %bb.1034: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB5_1035: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1037
+; X86-NEXT:  # %bb.1036: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1037: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1039
+; X86-NEXT:  # %bb.1038: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_1039: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_1041
+; X86-NEXT:  # %bb.1040: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1041: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1043
+; X86-NEXT:  # %bb.1042: # %udiv-preheader
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB5_1043: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jb .LBB5_1045
+; X86-NEXT:  # %bb.1044: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1045: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_1047
+; X86-NEXT:  # %bb.1046: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:  .LBB5_1047: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB5_1048
+; X86-NEXT:  # %bb.1049: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1050
+; X86-NEXT:  .LBB5_1051: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_1053
+; X86-NEXT:  .LBB5_1052: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1053: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    jb .LBB5_1055
+; X86-NEXT:  # %bb.1054: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1055: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %esi
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1057
+; X86-NEXT:  # %bb.1056: # %udiv-preheader
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_1057: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %esi
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1059
+; X86-NEXT:  # %bb.1058: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_1059: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jae .LBB5_1061
+; X86-NEXT:  # %bb.1060:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1061: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1063
+; X86-NEXT:  # %bb.1062:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1063: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_1065
+; X86-NEXT:  # %bb.1064:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1065: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    je .LBB5_1067
+; X86-NEXT:  # %bb.1066: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1067: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1068
+; X86-NEXT:  # %bb.1069: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_1070
+; X86-NEXT:  .LBB5_1071: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1072
+; X86-NEXT:  .LBB5_1073: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1075
+; X86-NEXT:  .LBB5_1074: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1075: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1077
+; X86-NEXT:  # %bb.1076: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1077: # %udiv-preheader
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %esi
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1079
+; X86-NEXT:  # %bb.1078: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_1079: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_1081
+; X86-NEXT:  # %bb.1080:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1081: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    subb %dl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1083
+; X86-NEXT:  # %bb.1082: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:  .LBB5_1083: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_1085
+; X86-NEXT:  # %bb.1084: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:  .LBB5_1085: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_1087
+; X86-NEXT:  # %bb.1086:
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB5_1087: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edi
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1089
+; X86-NEXT:  # %bb.1088: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB5_1089: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_1091
+; X86-NEXT:  # %bb.1090: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_1091: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB5_1093
+; X86-NEXT:  # %bb.1092: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:  .LBB5_1093: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_1095
+; X86-NEXT:  # %bb.1094: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1095: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %esi
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1097
+; X86-NEXT:  # %bb.1096: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_1097: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1099
+; X86-NEXT:  # %bb.1098: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_1099: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1101
+; X86-NEXT:  # %bb.1100:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:  .LBB5_1101: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1103
+; X86-NEXT:  # %bb.1102:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1103: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    je .LBB5_1105
+; X86-NEXT:  # %bb.1104: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB5_1105: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1107
+; X86-NEXT:  # %bb.1106:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB5_1107: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1109
+; X86-NEXT:  # %bb.1108: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1109: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_1111
+; X86-NEXT:  # %bb.1110: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1111: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_1113
+; X86-NEXT:  # %bb.1112: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1113: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    jae .LBB5_1115
+; X86-NEXT:  # %bb.1114:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %ecx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1115: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_1117
+; X86-NEXT:  # %bb.1116: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_1117: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_1119
+; X86-NEXT:  # %bb.1118: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1119: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    je .LBB5_1120
+; X86-NEXT:  # %bb.1121: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_1122
+; X86-NEXT:  .LBB5_1123: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_1124
+; X86-NEXT:  .LBB5_1125: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1126
+; X86-NEXT:  .LBB5_1127: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1129
+; X86-NEXT:  .LBB5_1128: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1129: # %udiv-preheader
+; X86-NEXT:    leal -896(%eax), %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_1131
+; X86-NEXT:  # %bb.1130: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_1131: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_1133
+; X86-NEXT:  # %bb.1132: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB5_1133: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_1135
+; X86-NEXT:  # %bb.1134: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1135: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_1137
+; X86-NEXT:  # %bb.1136: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1137: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1139
+; X86-NEXT:  # %bb.1138: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1139: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_1141
+; X86-NEXT:  # %bb.1140: # %udiv-preheader
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:  .LBB5_1141: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_1143
+; X86-NEXT:  # %bb.1142: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1143: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_1145
+; X86-NEXT:  # %bb.1144: # %udiv-preheader
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_1145: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1147
+; X86-NEXT:  # %bb.1146: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1147: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    jb .LBB5_1149
+; X86-NEXT:  # %bb.1148: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1149: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1151
+; X86-NEXT:  # %bb.1150: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1151: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_1153
+; X86-NEXT:  # %bb.1152: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1153: # %udiv-preheader
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1155
+; X86-NEXT:  # %bb.1154: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1155: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1157
+; X86-NEXT:  # %bb.1156:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1157: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_1159
+; X86-NEXT:  # %bb.1158: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1159: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1161
+; X86-NEXT:  # %bb.1160: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_1161: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    jb .LBB5_1163
+; X86-NEXT:  # %bb.1162: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1163: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_1165
+; X86-NEXT:  # %bb.1164: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1165: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1167
+; X86-NEXT:  # %bb.1166: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1167: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1169
+; X86-NEXT:  # %bb.1168:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1169: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_1171
+; X86-NEXT:  # %bb.1170: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_1171: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1173
+; X86-NEXT:  # %bb.1172: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:  .LBB5_1173: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1175
+; X86-NEXT:  # %bb.1174: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_1175: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    jne .LBB5_1177
+; X86-NEXT:  # %bb.1176: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_1177: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1179
+; X86-NEXT:  # %bb.1178: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_1179: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    jne .LBB5_1181
+; X86-NEXT:  # %bb.1180: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_1181: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    jne .LBB5_1183
+; X86-NEXT:  # %bb.1182: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_1183: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB5_1185
+; X86-NEXT:  # %bb.1184: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_1185: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1187
+; X86-NEXT:  # %bb.1186: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_1187: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1189
+; X86-NEXT:  # %bb.1188: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1189: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1191
+; X86-NEXT:  # %bb.1190: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1191: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_1193
+; X86-NEXT:  # %bb.1192: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1193: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_1195
+; X86-NEXT:  # %bb.1194: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1195: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_1197
+; X86-NEXT:  # %bb.1196: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1197: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1199
+; X86-NEXT:  # %bb.1198: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1199: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1201
+; X86-NEXT:  # %bb.1200: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1201: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %edx
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1203
+; X86-NEXT:  # %bb.1202: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB5_1203: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %edx
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    jne .LBB5_1205
+; X86-NEXT:  # %bb.1204: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_1205: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1207
+; X86-NEXT:  # %bb.1206:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1207: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_1209
+; X86-NEXT:  # %bb.1208: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_1209: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1211
+; X86-NEXT:  # %bb.1210: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_1211: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_1213
+; X86-NEXT:  # %bb.1212: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_1213: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1215
+; X86-NEXT:  # %bb.1214: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:  .LBB5_1215: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1217
+; X86-NEXT:  # %bb.1216: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB5_1217: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1219
+; X86-NEXT:  # %bb.1218: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_1219: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_1221
+; X86-NEXT:  # %bb.1220: # %udiv-preheader
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_1221: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1223
+; X86-NEXT:  # %bb.1222: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1223: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1225
+; X86-NEXT:  # %bb.1224: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1225: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1227
+; X86-NEXT:  # %bb.1226: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1227: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1229
+; X86-NEXT:  # %bb.1228: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1229: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_1231
+; X86-NEXT:  # %bb.1230: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1231: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1233
+; X86-NEXT:  # %bb.1232: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_1233: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1235
+; X86-NEXT:  # %bb.1234: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB5_1235: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    jae .LBB5_1237
+; X86-NEXT:  # %bb.1236:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1237: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    subb %al, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1239
+; X86-NEXT:  # %bb.1238: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1239: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_1241
+; X86-NEXT:  # %bb.1240:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1241: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    jne .LBB5_1242
+; X86-NEXT:  # %bb.1243: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_1244
+; X86-NEXT:  .LBB5_1245: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1247
+; X86-NEXT:  .LBB5_1246:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:  .LBB5_1247: # %udiv-preheader
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_1249
+; X86-NEXT:  # %bb.1248: # %udiv-preheader
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:  .LBB5_1249: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB5_1251
+; X86-NEXT:  # %bb.1250: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1251: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_1253
+; X86-NEXT:  # %bb.1252: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1253: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_1255
+; X86-NEXT:  # %bb.1254:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:  .LBB5_1255: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_1257
+; X86-NEXT:  # %bb.1256: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1257: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1259
+; X86-NEXT:  # %bb.1258: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_1259: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1261
+; X86-NEXT:  # %bb.1260: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_1261: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1263
+; X86-NEXT:  # %bb.1262: # %udiv-preheader
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:  .LBB5_1263: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1265
+; X86-NEXT:  # %bb.1264: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1265: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_1267
+; X86-NEXT:  # %bb.1266: # %udiv-preheader
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_1267: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1269
+; X86-NEXT:  # %bb.1268: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1269: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1271
+; X86-NEXT:  # %bb.1270: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1271: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1272
+; X86-NEXT:  # %bb.1273: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1274
+; X86-NEXT:  .LBB5_1275: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1276
+; X86-NEXT:  .LBB5_1277: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1279
+; X86-NEXT:  .LBB5_1278:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB5_1279: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1281
+; X86-NEXT:  # %bb.1280:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1281: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_1283
+; X86-NEXT:  # %bb.1282: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:  .LBB5_1283: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1285
+; X86-NEXT:  # %bb.1284: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1285: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    je .LBB5_1287
+; X86-NEXT:  # %bb.1286: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB5_1287: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1288
+; X86-NEXT:  # %bb.1289: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1290
+; X86-NEXT:  .LBB5_1291: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1293
+; X86-NEXT:  .LBB5_1292:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB5_1293: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_1295
+; X86-NEXT:  # %bb.1294: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:  .LBB5_1295: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    je .LBB5_1297
+; X86-NEXT:  # %bb.1296: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_1297: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_1298
+; X86-NEXT:  # %bb.1299: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_1300
+; X86-NEXT:  .LBB5_988: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_991
+; X86-NEXT:  .LBB5_990: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_993
+; X86-NEXT:  .LBB5_992: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_995
+; X86-NEXT:  .LBB5_994: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_996
+; X86-NEXT:    jmp .LBB5_997
+; X86-NEXT:  .LBB5_1048: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1051
+; X86-NEXT:  .LBB5_1050: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_1052
+; X86-NEXT:    jmp .LBB5_1053
+; X86-NEXT:  .LBB5_1068: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_1071
+; X86-NEXT:  .LBB5_1070: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1073
+; X86-NEXT:  .LBB5_1072:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1074
+; X86-NEXT:    jmp .LBB5_1075
+; X86-NEXT:  .LBB5_1120: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_1123
+; X86-NEXT:  .LBB5_1122: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_1125
+; X86-NEXT:  .LBB5_1124: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1127
+; X86-NEXT:  .LBB5_1126: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1128
+; X86-NEXT:    jmp .LBB5_1129
+; X86-NEXT:  .LBB5_1242: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_1245
+; X86-NEXT:  .LBB5_1244: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1246
+; X86-NEXT:    jmp .LBB5_1247
+; X86-NEXT:  .LBB5_1272: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1275
+; X86-NEXT:  .LBB5_1274:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1277
+; X86-NEXT:  .LBB5_1276:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1278
+; X86-NEXT:    jmp .LBB5_1279
+; X86-NEXT:  .LBB5_1288: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1291
+; X86-NEXT:  .LBB5_1290:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1292
+; X86-NEXT:    jmp .LBB5_1293
+; X86-NEXT:  .LBB5_1298:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1300: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shldl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    jne .LBB5_1302
+; X86-NEXT:  # %bb.1301: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1302: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1304
+; X86-NEXT:  # %bb.1303: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1304: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jb .LBB5_1306
+; X86-NEXT:  # %bb.1305: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1306: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_1308
+; X86-NEXT:  # %bb.1307: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1308: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1310
+; X86-NEXT:  # %bb.1309: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1310: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    jne .LBB5_1312
+; X86-NEXT:  # %bb.1311: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1312: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_1314
+; X86-NEXT:  # %bb.1313: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB5_1314: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1316
+; X86-NEXT:  # %bb.1315: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB5_1316: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1318
+; X86-NEXT:  # %bb.1317: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1318: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1320
+; X86-NEXT:  # %bb.1319: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1320: # %udiv-preheader
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_1322
+; X86-NEXT:  # %bb.1321: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_1322: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1324
+; X86-NEXT:  # %bb.1323: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_1324: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1326
+; X86-NEXT:  # %bb.1325: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1326: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Reload
+; X86-NEXT:    jae .LBB5_1328
+; X86-NEXT:  # %bb.1327:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1328: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1330
+; X86-NEXT:  # %bb.1329: # %udiv-preheader
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_1330: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1332
+; X86-NEXT:  # %bb.1331: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1332: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1334
+; X86-NEXT:  # %bb.1333: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1334: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_1336
+; X86-NEXT:  # %bb.1335: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1336: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_1338
+; X86-NEXT:  # %bb.1337:
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:  .LBB5_1338: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1340
+; X86-NEXT:  # %bb.1339: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1340: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    jne .LBB5_1342
+; X86-NEXT:  # %bb.1341: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1342: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1344
+; X86-NEXT:  # %bb.1343: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB5_1344: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1346
+; X86-NEXT:  # %bb.1345: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1346: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_1347
+; X86-NEXT:  # %bb.1348: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1349
+; X86-NEXT:  .LBB5_1350: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1352
+; X86-NEXT:  .LBB5_1351:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1352: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1354
+; X86-NEXT:  # %bb.1353: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_1354: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_1356
+; X86-NEXT:  # %bb.1355: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1356: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1357
+; X86-NEXT:  # %bb.1358: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_1359
+; X86-NEXT:  .LBB5_1360: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_1361
+; X86-NEXT:  .LBB5_1362: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1363
+; X86-NEXT:  .LBB5_1364: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1365
+; X86-NEXT:  .LBB5_1366: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1368
+; X86-NEXT:  .LBB5_1367:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1368: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_1370
+; X86-NEXT:  # %bb.1369:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1370: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edi
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1372
+; X86-NEXT:  # %bb.1371: # %udiv-preheader
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:  .LBB5_1372: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $64, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1374
+; X86-NEXT:  # %bb.1373:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1374: # %udiv-preheader
+; X86-NEXT:    movl $896, %ecx # imm = 0x380
+; X86-NEXT:    subl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB5_1376
+; X86-NEXT:  # %bb.1375:
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB5_1376: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1378
+; X86-NEXT:  # %bb.1377: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB5_1378: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %edi
+; X86-NEXT:    jae .LBB5_1380
+; X86-NEXT:  # %bb.1379:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1380: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %esi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB5_1382
+; X86-NEXT:  # %bb.1381:
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB5_1382: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jae .LBB5_1384
+; X86-NEXT:  # %bb.1383:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1384: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1386
+; X86-NEXT:  # %bb.1385:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1386: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1388
+; X86-NEXT:  # %bb.1387:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1388: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1390
+; X86-NEXT:  # %bb.1389: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1390: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    je .LBB5_1392
+; X86-NEXT:  # %bb.1391: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1392: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1394
+; X86-NEXT:  # %bb.1393:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1394: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $256, %eax # imm = 0x100
+; X86-NEXT:    jae .LBB5_1396
+; X86-NEXT:  # %bb.1395:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1396: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1398
+; X86-NEXT:  # %bb.1397: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1398: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_1400
+; X86-NEXT:  # %bb.1399: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_1400: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1402
+; X86-NEXT:  # %bb.1401: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1402: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_1404
+; X86-NEXT:  # %bb.1403: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_1404: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_1406
+; X86-NEXT:  # %bb.1405:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1406: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_1408
+; X86-NEXT:  # %bb.1407: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1408: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1410
+; X86-NEXT:  # %bb.1409:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1410: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1412
+; X86-NEXT:  # %bb.1411: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1412: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    jne .LBB5_1414
+; X86-NEXT:  # %bb.1413: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1414: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1416
+; X86-NEXT:  # %bb.1415: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1416: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1418
+; X86-NEXT:  # %bb.1417:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1418: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_1420
+; X86-NEXT:  # %bb.1419: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_1420: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1422
+; X86-NEXT:  # %bb.1421: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1422: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1424
+; X86-NEXT:  # %bb.1423: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1424: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jb .LBB5_1426
+; X86-NEXT:  # %bb.1425: # %udiv-preheader
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB5_1426: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB5_1428
+; X86-NEXT:  # %bb.1427: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1428: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1430
+; X86-NEXT:  # %bb.1429:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1430: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1432
+; X86-NEXT:  # %bb.1431: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_1432: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1434
+; X86-NEXT:  # %bb.1433:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1434: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_1436
+; X86-NEXT:  # %bb.1435: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1436: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_1438
+; X86-NEXT:  # %bb.1437:
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB5_1438: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1440
+; X86-NEXT:  # %bb.1439: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1440: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_1442
+; X86-NEXT:  # %bb.1441: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_1442: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1444
+; X86-NEXT:  # %bb.1443: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB5_1444: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_1446
+; X86-NEXT:  # %bb.1445: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_1446: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_1448
+; X86-NEXT:  # %bb.1447:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1448: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1450
+; X86-NEXT:  # %bb.1449:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %esi, %ebp
+; X86-NEXT:  .LBB5_1450: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1452
+; X86-NEXT:  # %bb.1451: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1452: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_1454
+; X86-NEXT:  # %bb.1453: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB5_1454: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1456
+; X86-NEXT:  # %bb.1455:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %ecx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1456: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1458
+; X86-NEXT:  # %bb.1457: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1458: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1460
+; X86-NEXT:  # %bb.1459: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1460: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jb .LBB5_1462
+; X86-NEXT:  # %bb.1461: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB5_1462: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_1464
+; X86-NEXT:  # %bb.1463: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_1464: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1466
+; X86-NEXT:  # %bb.1465:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %edx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1466: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1468
+; X86-NEXT:  # %bb.1467: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_1468: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1469
+; X86-NEXT:  # %bb.1470: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_1473
+; X86-NEXT:  .LBB5_1472: # %udiv-preheader
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_1473: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_1475
+; X86-NEXT:  # %bb.1474:
+; X86-NEXT:    orl %eax, %ebx
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:  .LBB5_1475: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1477
+; X86-NEXT:  # %bb.1476: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_1477: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %esi
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1479
+; X86-NEXT:  # %bb.1478: # %udiv-preheader
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_1479: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    jae .LBB5_1480
+; X86-NEXT:  # %bb.1481: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1482
+; X86-NEXT:  .LBB5_1483: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1485
+; X86-NEXT:  .LBB5_1484: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB5_1485: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1487
+; X86-NEXT:  # %bb.1486: # %udiv-preheader
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB5_1487: # %udiv-preheader
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %ebx
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1489
+; X86-NEXT:  # %bb.1488: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:  .LBB5_1489: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1491
+; X86-NEXT:  # %bb.1490: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1491: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1493
+; X86-NEXT:  # %bb.1492: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1493: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1495
+; X86-NEXT:  # %bb.1494:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1495: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1497
+; X86-NEXT:  # %bb.1496:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1497: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1499
+; X86-NEXT:  # %bb.1498:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1499: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1501
+; X86-NEXT:  # %bb.1500: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1501: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1503
+; X86-NEXT:  # %bb.1502:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1503: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_1505
+; X86-NEXT:  # %bb.1504: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_1505: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_1507
+; X86-NEXT:  # %bb.1506: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1507: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    jne .LBB5_1508
+; X86-NEXT:  # %bb.1509: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1510
+; X86-NEXT:  .LBB5_1511: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_1512
+; X86-NEXT:  .LBB5_1513: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1514
+; X86-NEXT:  .LBB5_1515: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1517
+; X86-NEXT:  .LBB5_1516:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:  .LBB5_1517: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1519
+; X86-NEXT:  # %bb.1518: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:  .LBB5_1519: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    je .LBB5_1521
+; X86-NEXT:  # %bb.1520: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:  .LBB5_1521: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_1522
+; X86-NEXT:  # %bb.1523: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_1524
+; X86-NEXT:  .LBB5_1347:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1350
+; X86-NEXT:  .LBB5_1349:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1351
+; X86-NEXT:    jmp .LBB5_1352
+; X86-NEXT:  .LBB5_1357: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_1360
+; X86-NEXT:  .LBB5_1359: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_1362
+; X86-NEXT:  .LBB5_1361: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1364
+; X86-NEXT:  .LBB5_1363:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1366
+; X86-NEXT:  .LBB5_1365:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1367
+; X86-NEXT:    jmp .LBB5_1368
+; X86-NEXT:  .LBB5_1469:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %edx, %ebp
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_1472
+; X86-NEXT:    jmp .LBB5_1473
+; X86-NEXT:  .LBB5_1480: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1483
+; X86-NEXT:  .LBB5_1482: # %udiv-preheader
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1484
+; X86-NEXT:    jmp .LBB5_1485
+; X86-NEXT:  .LBB5_1508: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1511
+; X86-NEXT:  .LBB5_1510: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_1513
+; X86-NEXT:  .LBB5_1512: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1515
+; X86-NEXT:  .LBB5_1514:
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1516
+; X86-NEXT:    jmp .LBB5_1517
+; X86-NEXT:  .LBB5_1522:
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1524: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_1526
+; X86-NEXT:  # %bb.1525: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_1526: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1528
+; X86-NEXT:  # %bb.1527: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:  .LBB5_1528: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jb .LBB5_1530
+; X86-NEXT:  # %bb.1529: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB5_1530: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1532
+; X86-NEXT:  # %bb.1531: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1532: # %udiv-preheader
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_1534
+; X86-NEXT:  # %bb.1533: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1534: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1536
+; X86-NEXT:  # %bb.1535: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1536: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1538
+; X86-NEXT:  # %bb.1537: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1538: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1540
+; X86-NEXT:  # %bb.1539: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_1540: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1542
+; X86-NEXT:  # %bb.1541: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1542: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_1544
+; X86-NEXT:  # %bb.1543: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_1544: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jae .LBB5_1546
+; X86-NEXT:  # %bb.1545:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1546: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    je .LBB5_1548
+; X86-NEXT:  # %bb.1547: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_1548: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jb .LBB5_1549
+; X86-NEXT:  # %bb.1550: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jmp .LBB5_1551
+; X86-NEXT:  .LBB5_1549:
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_1551: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB5_1553
+; X86-NEXT:  # %bb.1552: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_1553: # %udiv-preheader
+; X86-NEXT:    cmpl $512, %eax # imm = 0x200
+; X86-NEXT:    jb .LBB5_1555
+; X86-NEXT:  # %bb.1554: # %udiv-preheader
+; X86-NEXT:    movl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1555: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_1557
+; X86-NEXT:  # %bb.1556: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB5_1557: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1559
+; X86-NEXT:  # %bb.1558: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1559: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1561
+; X86-NEXT:  # %bb.1560: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1561: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_1563
+; X86-NEXT:  # %bb.1562: # %udiv-preheader
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:  .LBB5_1563: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %esi
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    jne .LBB5_1565
+; X86-NEXT:  # %bb.1564: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1565: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1567
+; X86-NEXT:  # %bb.1566: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1567: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1569
+; X86-NEXT:  # %bb.1568: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:  .LBB5_1569: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB5_1570
+; X86-NEXT:  # %bb.1571: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1572
+; X86-NEXT:  .LBB5_1573: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_1575
+; X86-NEXT:  .LBB5_1574:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1575: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_1577
+; X86-NEXT:  # %bb.1576: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_1577: # %udiv-preheader
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB5_1579
+; X86-NEXT:  # %bb.1578: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_1579: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1581
+; X86-NEXT:  # %bb.1580:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1581: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    subb %dl, %cl
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_1583
+; X86-NEXT:  # %bb.1582: # %udiv-preheader
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_1583: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jb .LBB5_1584
+; X86-NEXT:  # %bb.1585: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1588
+; X86-NEXT:  .LBB5_1587: # %udiv-preheader
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:  .LBB5_1588: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB5_1590
+; X86-NEXT:  # %bb.1589: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_1590: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1591
+; X86-NEXT:  # %bb.1592: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jae .LBB5_1593
+; X86-NEXT:  .LBB5_1594: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_1595
+; X86-NEXT:  .LBB5_1596: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_1598
+; X86-NEXT:  .LBB5_1597:
+; X86-NEXT:    orl %esi, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1598: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_1600
+; X86-NEXT:  # %bb.1599: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_1600: # %udiv-preheader
+; X86-NEXT:    cmpl $512, %eax # imm = 0x200
+; X86-NEXT:    jae .LBB5_1602
+; X86-NEXT:  # %bb.1601:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1602: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1604
+; X86-NEXT:  # %bb.1603: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1604: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1606
+; X86-NEXT:  # %bb.1605: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1606: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB5_1607
+; X86-NEXT:  # %bb.1608: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1609
+; X86-NEXT:  .LBB5_1610: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1611
+; X86-NEXT:  .LBB5_1612: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1614
+; X86-NEXT:  .LBB5_1613: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1614: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1616
+; X86-NEXT:  # %bb.1615: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1616: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1618
+; X86-NEXT:  # %bb.1617: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB5_1618: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1620
+; X86-NEXT:  # %bb.1619: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1620: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1622
+; X86-NEXT:  # %bb.1621:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1622: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1624
+; X86-NEXT:  # %bb.1623:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1624: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1626
+; X86-NEXT:  # %bb.1625: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1626: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1628
+; X86-NEXT:  # %bb.1627:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1628: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1630
+; X86-NEXT:  # %bb.1629: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_1630: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    je .LBB5_1632
+; X86-NEXT:  # %bb.1631: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1632: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_1634
+; X86-NEXT:  # %bb.1633:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1634: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1636
+; X86-NEXT:  # %bb.1635: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1636: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    jne .LBB5_1637
+; X86-NEXT:  # %bb.1638: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1639
+; X86-NEXT:  .LBB5_1640: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1642
+; X86-NEXT:  .LBB5_1641: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1642: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1643
+; X86-NEXT:  # %bb.1644: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1645
+; X86-NEXT:  .LBB5_1646: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1647
+; X86-NEXT:  .LBB5_1648: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_1649
+; X86-NEXT:  .LBB5_1650: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_1651
+; X86-NEXT:  .LBB5_1652: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1654
+; X86-NEXT:  .LBB5_1653:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %edx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1654: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    je .LBB5_1656
+; X86-NEXT:  # %bb.1655: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_1656: # %udiv-preheader
+; X86-NEXT:    cmpl $256, %edi # imm = 0x100
+; X86-NEXT:    jae .LBB5_1658
+; X86-NEXT:  # %bb.1657:
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_1658: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    je .LBB5_1660
+; X86-NEXT:  # %bb.1659: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB5_1660: # %udiv-preheader
+; X86-NEXT:    cmpl $512, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1662
+; X86-NEXT:  # %bb.1661:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1662: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_1664
+; X86-NEXT:  # %bb.1663: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB5_1664: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_1666
+; X86-NEXT:  # %bb.1665: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1666: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1668
+; X86-NEXT:  # %bb.1667: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1668: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_1670
+; X86-NEXT:  # %bb.1669: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1670: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1672
+; X86-NEXT:  # %bb.1671: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1672: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1674
+; X86-NEXT:  # %bb.1673: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1674: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1676
+; X86-NEXT:  # %bb.1675: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1676: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %eax
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    jne .LBB5_1678
+; X86-NEXT:  # %bb.1677: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_1678: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_1679
+; X86-NEXT:  # %bb.1680: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jmp .LBB5_1681
+; X86-NEXT:  .LBB5_1570: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1573
+; X86-NEXT:  .LBB5_1572:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_1574
+; X86-NEXT:    jmp .LBB5_1575
+; X86-NEXT:  .LBB5_1584:
+; X86-NEXT:    orl %esi, %ebx
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1587
+; X86-NEXT:    jmp .LBB5_1588
+; X86-NEXT:  .LBB5_1591: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jb .LBB5_1594
+; X86-NEXT:  .LBB5_1593: # %udiv-preheader
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_1596
+; X86-NEXT:  .LBB5_1595: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_1597
+; X86-NEXT:    jmp .LBB5_1598
+; X86-NEXT:  .LBB5_1607: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1610
+; X86-NEXT:  .LBB5_1609: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1612
+; X86-NEXT:  .LBB5_1611: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1613
+; X86-NEXT:    jmp .LBB5_1614
+; X86-NEXT:  .LBB5_1637: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1640
+; X86-NEXT:  .LBB5_1639:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %edx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1641
+; X86-NEXT:    jmp .LBB5_1642
+; X86-NEXT:  .LBB5_1643: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1646
+; X86-NEXT:  .LBB5_1645: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1648
+; X86-NEXT:  .LBB5_1647: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_1650
+; X86-NEXT:  .LBB5_1649: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_1652
+; X86-NEXT:  .LBB5_1651: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1653
+; X86-NEXT:    jmp .LBB5_1654
+; X86-NEXT:  .LBB5_1679:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl %ecx, %ebx
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:  .LBB5_1681: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    je .LBB5_1683
+; X86-NEXT:  # %bb.1682: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:  .LBB5_1683: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_1685
+; X86-NEXT:  # %bb.1684: # %udiv-preheader
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:  .LBB5_1685: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1687
+; X86-NEXT:  # %bb.1686: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1687: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_1689
+; X86-NEXT:  # %bb.1688: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_1689: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    je .LBB5_1690
+; X86-NEXT:  # %bb.1691: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1692
+; X86-NEXT:  .LBB5_1693: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1694
+; X86-NEXT:  .LBB5_1695: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1697
+; X86-NEXT:  .LBB5_1696: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1697: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1699
+; X86-NEXT:  # %bb.1698: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1699: # %udiv-preheader
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %esi
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    jne .LBB5_1701
+; X86-NEXT:  # %bb.1700: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_1701: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1703
+; X86-NEXT:  # %bb.1702:
+; X86-NEXT:    orl %ecx, %ebp
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB5_1703: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %ebp
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1705
+; X86-NEXT:  # %bb.1704: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:  .LBB5_1705: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB5_1707
+; X86-NEXT:  # %bb.1706: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_1707: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1709
+; X86-NEXT:  # %bb.1708: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1709: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_1710
+; X86-NEXT:  # %bb.1711: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1714
+; X86-NEXT:  .LBB5_1713:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1714: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jae .LBB5_1716
+; X86-NEXT:  # %bb.1715:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1716: # %udiv-preheader
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    je .LBB5_1718
+; X86-NEXT:  # %bb.1717: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1718: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1720
+; X86-NEXT:  # %bb.1719:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1720: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_1722
+; X86-NEXT:  # %bb.1721: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB5_1722: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    je .LBB5_1724
+; X86-NEXT:  # %bb.1723: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_1724: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1725
+; X86-NEXT:  # %bb.1726: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1727
+; X86-NEXT:  .LBB5_1728: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_1730
+; X86-NEXT:  .LBB5_1729: # %udiv-preheader
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB5_1730: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_1732
+; X86-NEXT:  # %bb.1731:
+; X86-NEXT:    orl %ecx, %ebx
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:  .LBB5_1732: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    je .LBB5_1734
+; X86-NEXT:  # %bb.1733: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_1734: # %udiv-preheader
+; X86-NEXT:    cmpl $512, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1736
+; X86-NEXT:  # %bb.1735:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1736: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_1738
+; X86-NEXT:  # %bb.1737: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_1738: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB5_1740
+; X86-NEXT:  # %bb.1739: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1740: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1741
+; X86-NEXT:  # %bb.1742: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jae .LBB5_1743
+; X86-NEXT:  .LBB5_1744: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jae .LBB5_1745
+; X86-NEXT:  .LBB5_1746: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %edi
+; X86-NEXT:    jae .LBB5_1747
+; X86-NEXT:  .LBB5_1748: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %ebx
+; X86-NEXT:    jb .LBB5_1750
+; X86-NEXT:  .LBB5_1749: # %udiv-preheader
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:  .LBB5_1750: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_1752
+; X86-NEXT:  # %bb.1751: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB5_1752: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1754
+; X86-NEXT:  # %bb.1753: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1754: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1756
+; X86-NEXT:  # %bb.1755: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1756: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_1758
+; X86-NEXT:  # %bb.1757: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_1758: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1760
+; X86-NEXT:  # %bb.1759: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1760: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1762
+; X86-NEXT:  # %bb.1761: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1762: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1763
+; X86-NEXT:  # %bb.1764: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jmp .LBB5_1765
+; X86-NEXT:  .LBB5_1690: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1693
+; X86-NEXT:  .LBB5_1692: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1695
+; X86-NEXT:  .LBB5_1694: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1696
+; X86-NEXT:    jmp .LBB5_1697
+; X86-NEXT:  .LBB5_1710:
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1713
+; X86-NEXT:    jmp .LBB5_1714
+; X86-NEXT:  .LBB5_1725: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1728
+; X86-NEXT:  .LBB5_1727: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_1729
+; X86-NEXT:    jmp .LBB5_1730
+; X86-NEXT:  .LBB5_1741: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jb .LBB5_1744
+; X86-NEXT:  .LBB5_1743: # %udiv-preheader
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_1746
+; X86-NEXT:  .LBB5_1745: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, %edi
+; X86-NEXT:    jb .LBB5_1748
+; X86-NEXT:  .LBB5_1747: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, %ebx
+; X86-NEXT:    jae .LBB5_1749
+; X86-NEXT:    jmp .LBB5_1750
+; X86-NEXT:  .LBB5_1763:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:  .LBB5_1765: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $64, %esi
+; X86-NEXT:    jae .LBB5_1767
+; X86-NEXT:  # %bb.1766:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ebp, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1767: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1769
+; X86-NEXT:  # %bb.1768: # %udiv-preheader
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:  .LBB5_1769: # %udiv-preheader
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    je .LBB5_1771
+; X86-NEXT:  # %bb.1770: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_1771: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1773
+; X86-NEXT:  # %bb.1772:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:  .LBB5_1773: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1775
+; X86-NEXT:  # %bb.1774: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:  .LBB5_1775: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_1777
+; X86-NEXT:  # %bb.1776:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:  .LBB5_1777: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1779
+; X86-NEXT:  # %bb.1778: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_1779: # %udiv-preheader
+; X86-NEXT:    cmpl $512, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jae .LBB5_1781
+; X86-NEXT:  # %bb.1780:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1781: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $256, %edx # imm = 0x100
+; X86-NEXT:    jae .LBB5_1783
+; X86-NEXT:  # %bb.1782:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1783: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1785
+; X86-NEXT:  # %bb.1784: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1785: # %udiv-preheader
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    je .LBB5_1787
+; X86-NEXT:  # %bb.1786: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_1787: # %udiv-preheader
+; X86-NEXT:    cmpl $512, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jae .LBB5_1789
+; X86-NEXT:  # %bb.1788:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:  .LBB5_1789: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_1791
+; X86-NEXT:  # %bb.1790: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_1791: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1793
+; X86-NEXT:  # %bb.1792: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1793: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1795
+; X86-NEXT:  # %bb.1794: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1795: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1797
+; X86-NEXT:  # %bb.1796: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1797: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    shldl %cl, %ebp, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1799
+; X86-NEXT:  # %bb.1798: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1799: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jb .LBB5_1801
+; X86-NEXT:  # %bb.1800: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1801: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_1803
+; X86-NEXT:  # %bb.1802: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1803: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    jb .LBB5_1805
+; X86-NEXT:  # %bb.1804: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1805: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1807
+; X86-NEXT:  # %bb.1806: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1807: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1809
+; X86-NEXT:  # %bb.1808: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_1809: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1811
+; X86-NEXT:  # %bb.1810: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1811: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %esi
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1813
+; X86-NEXT:  # %bb.1812: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1813: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jb .LBB5_1814
+; X86-NEXT:  # %bb.1815: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1818
+; X86-NEXT:  .LBB5_1817:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1818: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1820
+; X86-NEXT:  # %bb.1819:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1820: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1822
+; X86-NEXT:  # %bb.1821: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1822: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1824
+; X86-NEXT:  # %bb.1823:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1824: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    je .LBB5_1826
+; X86-NEXT:  # %bb.1825: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_1826: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    je .LBB5_1828
+; X86-NEXT:  # %bb.1827: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_1828: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    je .LBB5_1830
+; X86-NEXT:  # %bb.1829: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_1830: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1832
+; X86-NEXT:  # %bb.1831: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1832: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1834
+; X86-NEXT:  # %bb.1833: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1834: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1836
+; X86-NEXT:  # %bb.1835:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB5_1836: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_1838
+; X86-NEXT:  # %bb.1837: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:  .LBB5_1838: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1840
+; X86-NEXT:  # %bb.1839:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_1840: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $256, %edi # imm = 0x100
+; X86-NEXT:    jae .LBB5_1842
+; X86-NEXT:  # %bb.1841:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1842: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    je .LBB5_1844
+; X86-NEXT:  # %bb.1843: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:  .LBB5_1844: # %udiv-preheader
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    je .LBB5_1846
+; X86-NEXT:  # %bb.1845: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1846: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jb .LBB5_1848
+; X86-NEXT:  # %bb.1847: # %udiv-preheader
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB5_1848: # %udiv-preheader
+; X86-NEXT:    cmpl $512, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jae .LBB5_1850
+; X86-NEXT:  # %bb.1849:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:  .LBB5_1850: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB5_1852
+; X86-NEXT:  # %bb.1851: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_1852: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1854
+; X86-NEXT:  # %bb.1853: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1854: # %udiv-preheader
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_1856
+; X86-NEXT:  # %bb.1855: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1856: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1858
+; X86-NEXT:  # %bb.1857: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1858: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1860
+; X86-NEXT:  # %bb.1859: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1860: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_1862
+; X86-NEXT:  # %bb.1861: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1862: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    jne .LBB5_1864
+; X86-NEXT:  # %bb.1863: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB5_1864: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1866
+; X86-NEXT:  # %bb.1865: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1866: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1868
+; X86-NEXT:  # %bb.1867: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1868: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1870
+; X86-NEXT:  # %bb.1869: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1870: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1872
+; X86-NEXT:  # %bb.1871: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1872: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB5_1873
+; X86-NEXT:  # %bb.1874: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1875
+; X86-NEXT:  .LBB5_1876: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1878
+; X86-NEXT:  .LBB5_1877:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1878: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1880
+; X86-NEXT:  # %bb.1879: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1880: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jae .LBB5_1882
+; X86-NEXT:  # %bb.1881:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_1882: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1884
+; X86-NEXT:  # %bb.1883: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1884: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_1886
+; X86-NEXT:  # %bb.1885:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1886: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1888
+; X86-NEXT:  # %bb.1887: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1888: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    je .LBB5_1890
+; X86-NEXT:  # %bb.1889: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1890: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    je .LBB5_1892
+; X86-NEXT:  # %bb.1891: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_1892: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1894
+; X86-NEXT:  # %bb.1893: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1894: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1896
+; X86-NEXT:  # %bb.1895: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:  .LBB5_1896: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1898
+; X86-NEXT:  # %bb.1897: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1898: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1899
+; X86-NEXT:  # %bb.1900: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1901
+; X86-NEXT:  .LBB5_1902: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1903
+; X86-NEXT:  .LBB5_1904: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1905
+; X86-NEXT:  .LBB5_1906: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1907
+; X86-NEXT:  .LBB5_1908: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1910
+; X86-NEXT:  .LBB5_1909:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1910: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_1912
+; X86-NEXT:  # %bb.1911: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_1912: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    je .LBB5_1914
+; X86-NEXT:  # %bb.1913: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_1914: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_1916
+; X86-NEXT:  # %bb.1915:
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB5_1916: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_1918
+; X86-NEXT:  # %bb.1917: # %udiv-preheader
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:  .LBB5_1918: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1919
+; X86-NEXT:  # %bb.1920: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_1921
+; X86-NEXT:  .LBB5_1922: # %udiv-preheader
+; X86-NEXT:    cmpl $512, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jae .LBB5_1924
+; X86-NEXT:  .LBB5_1923:
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:  .LBB5_1924: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1926
+; X86-NEXT:  # %bb.1925: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1926: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1928
+; X86-NEXT:  # %bb.1927: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_1928: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1930
+; X86-NEXT:  # %bb.1929: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1930: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1932
+; X86-NEXT:  # %bb.1931: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1932: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    jne .LBB5_1934
+; X86-NEXT:  # %bb.1933: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1934: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1936
+; X86-NEXT:  # %bb.1935: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1936: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_1938
+; X86-NEXT:  # %bb.1937: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1938: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1940
+; X86-NEXT:  # %bb.1939: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1940: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1942
+; X86-NEXT:  # %bb.1941: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_1942: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1944
+; X86-NEXT:  # %bb.1943: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1944: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1946
+; X86-NEXT:  # %bb.1945: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1946: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1948
+; X86-NEXT:  # %bb.1947: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_1948: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    je .LBB5_1949
+; X86-NEXT:  # %bb.1950: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1951
+; X86-NEXT:  .LBB5_1952: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_1953
+; X86-NEXT:  .LBB5_1954: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jb .LBB5_1956
+; X86-NEXT:  .LBB5_1955: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1956: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1958
+; X86-NEXT:  # %bb.1957: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_1958: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %esi
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1960
+; X86-NEXT:  # %bb.1959: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_1960: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %esi
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1962
+; X86-NEXT:  # %bb.1961: # %udiv-preheader
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:  .LBB5_1962: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %esi
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    jne .LBB5_1964
+; X86-NEXT:  # %bb.1963: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_1964: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_1965
+; X86-NEXT:  # %bb.1966: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $64, %esi
+; X86-NEXT:    jae .LBB5_1969
+; X86-NEXT:  .LBB5_1968:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1969: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_1971
+; X86-NEXT:  # %bb.1970:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1971: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_1973
+; X86-NEXT:  # %bb.1972: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:  .LBB5_1973: # %udiv-preheader
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_1975
+; X86-NEXT:  # %bb.1974: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_1975: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_1977
+; X86-NEXT:  # %bb.1976: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_1977: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jae .LBB5_1979
+; X86-NEXT:  # %bb.1978:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:  .LBB5_1979: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1980
+; X86-NEXT:  # %bb.1981: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne .LBB5_1982
+; X86-NEXT:  .LBB5_1983: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1985
+; X86-NEXT:  .LBB5_1984:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1985: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_1987
+; X86-NEXT:  # %bb.1986: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB5_1987: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1989
+; X86-NEXT:  # %bb.1988:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_1989: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1991
+; X86-NEXT:  # %bb.1990: # %udiv-preheader
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:  .LBB5_1991: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_1993
+; X86-NEXT:  # %bb.1992: # %udiv-preheader
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB5_1993: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_1995
+; X86-NEXT:  # %bb.1994: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_1995: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_1996
+; X86-NEXT:  # %bb.1997: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_1998
+; X86-NEXT:  .LBB5_1999: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_2001
+; X86-NEXT:  .LBB5_2000:
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB5_2001: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2003
+; X86-NEXT:  # %bb.2002: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2003: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_2005
+; X86-NEXT:  # %bb.2004: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2005: # %udiv-preheader
+; X86-NEXT:    cmpl $512, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jae .LBB5_2007
+; X86-NEXT:  # %bb.2006:
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2007: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2009
+; X86-NEXT:  # %bb.2008: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2009: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    jne .LBB5_2011
+; X86-NEXT:  # %bb.2010: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2011: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2013
+; X86-NEXT:  # %bb.2012: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2013: # %udiv-preheader
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_2015
+; X86-NEXT:  # %bb.2014: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_2015: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2017
+; X86-NEXT:  # %bb.2016: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2017: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2019
+; X86-NEXT:  # %bb.2018: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2019: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_2021
+; X86-NEXT:  # %bb.2020: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_2021: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2023
+; X86-NEXT:  # %bb.2022: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2023: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2025
+; X86-NEXT:  # %bb.2024: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2025: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2027
+; X86-NEXT:  # %bb.2026:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2027: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_2029
+; X86-NEXT:  # %bb.2028:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2029: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2030
+; X86-NEXT:  # %bb.2031: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jmp .LBB5_2032
+; X86-NEXT:  .LBB5_1814:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1817
+; X86-NEXT:    jmp .LBB5_1818
+; X86-NEXT:  .LBB5_1873: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1876
+; X86-NEXT:  .LBB5_1875:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1877
+; X86-NEXT:    jmp .LBB5_1878
+; X86-NEXT:  .LBB5_1899: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1902
+; X86-NEXT:  .LBB5_1901: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1904
+; X86-NEXT:  .LBB5_1903: # %udiv-preheader
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1906
+; X86-NEXT:  .LBB5_1905: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_1908
+; X86-NEXT:  .LBB5_1907:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1909
+; X86-NEXT:    jmp .LBB5_1910
+; X86-NEXT:  .LBB5_1919: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_1922
+; X86-NEXT:  .LBB5_1921: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    cmpl $512, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jb .LBB5_1923
+; X86-NEXT:    jmp .LBB5_1924
+; X86-NEXT:  .LBB5_1949: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1952
+; X86-NEXT:  .LBB5_1951: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_1954
+; X86-NEXT:  .LBB5_1953: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jae .LBB5_1955
+; X86-NEXT:    jmp .LBB5_1956
+; X86-NEXT:  .LBB5_1965:
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %esi
+; X86-NEXT:    jb .LBB5_1968
+; X86-NEXT:    jmp .LBB5_1969
+; X86-NEXT:  .LBB5_1980: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    je .LBB5_1983
+; X86-NEXT:  .LBB5_1982: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_1984
+; X86-NEXT:    jmp .LBB5_1985
+; X86-NEXT:  .LBB5_1996: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_1999
+; X86-NEXT:  .LBB5_1998: # %udiv-preheader
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_2000
+; X86-NEXT:    jmp .LBB5_2001
+; X86-NEXT:  .LBB5_2030:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:  .LBB5_2032: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    je .LBB5_2034
+; X86-NEXT:  # %bb.2033: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_2034: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2036
+; X86-NEXT:  # %bb.2035: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2036: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2037
+; X86-NEXT:  # %bb.2038: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2039
+; X86-NEXT:  .LBB5_2040: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2042
+; X86-NEXT:  .LBB5_2041:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_2042: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2043
+; X86-NEXT:  # %bb.2044: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2045
+; X86-NEXT:  .LBB5_2046: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_2047
+; X86-NEXT:  .LBB5_2048: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_2049
+; X86-NEXT:  .LBB5_2050: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_2051
+; X86-NEXT:  .LBB5_2037: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2040
+; X86-NEXT:  .LBB5_2039: # %udiv-preheader
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2041
+; X86-NEXT:    jmp .LBB5_2042
+; X86-NEXT:  .LBB5_2043: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2046
+; X86-NEXT:  .LBB5_2045:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_2048
+; X86-NEXT:  .LBB5_2047: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_2050
+; X86-NEXT:  .LBB5_2049:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2051: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2053
+; X86-NEXT:  # %bb.2052: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2053: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_2055
+; X86-NEXT:  # %bb.2054: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_2055: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2057
+; X86-NEXT:  # %bb.2056: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2057: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2059
+; X86-NEXT:  # %bb.2058: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2059: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jae .LBB5_2061
+; X86-NEXT:  # %bb.2060:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2061: # %udiv-preheader
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2062
+; X86-NEXT:  # %bb.2063: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2064
+; X86-NEXT:  .LBB5_2065: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2067
+; X86-NEXT:  .LBB5_2066:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2067: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    je .LBB5_2069
+; X86-NEXT:  # %bb.2068: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_2069: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2071
+; X86-NEXT:  # %bb.2070: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2071: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebx, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2073
+; X86-NEXT:  # %bb.2072: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2073: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2074
+; X86-NEXT:  # %bb.2075: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2076
+; X86-NEXT:  .LBB5_2077: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2079
+; X86-NEXT:  .LBB5_2078:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2079: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_2081
+; X86-NEXT:  # %bb.2080: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2081: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2082
+; X86-NEXT:  # %bb.2083: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_2086
+; X86-NEXT:  .LBB5_2085:
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_2086: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    je .LBB5_2088
+; X86-NEXT:  # %bb.2087: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB5_2088: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    je .LBB5_2090
+; X86-NEXT:  # %bb.2089: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_2090: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_2092
+; X86-NEXT:  # %bb.2091: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2092: # %udiv-preheader
+; X86-NEXT:    cmpl $512, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jae .LBB5_2094
+; X86-NEXT:  # %bb.2093:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2094: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jae .LBB5_2096
+; X86-NEXT:  # %bb.2095:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2096: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_2098
+; X86-NEXT:  # %bb.2097: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2098: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_2100
+; X86-NEXT:  # %bb.2099: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB5_2100: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    je .LBB5_2102
+; X86-NEXT:  # %bb.2101: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2102: # %udiv-preheader
+; X86-NEXT:    cmpl $512, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jb .LBB5_2103
+; X86-NEXT:  # %bb.2104: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_2105
+; X86-NEXT:  .LBB5_2062: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2065
+; X86-NEXT:  .LBB5_2064:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2066
+; X86-NEXT:    jmp .LBB5_2067
+; X86-NEXT:  .LBB5_2074: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2077
+; X86-NEXT:  .LBB5_2076: # %udiv-preheader
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2078
+; X86-NEXT:    jmp .LBB5_2079
+; X86-NEXT:  .LBB5_2082:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_2085
+; X86-NEXT:    jmp .LBB5_2086
+; X86-NEXT:  .LBB5_2103:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2105: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2107
+; X86-NEXT:  # %bb.2106: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2107: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    jne .LBB5_2109
+; X86-NEXT:  # %bb.2108: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_2109: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb %al, %cl
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2111
+; X86-NEXT:  # %bb.2110: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2111: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2113
+; X86-NEXT:  # %bb.2112: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_2113: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2115
+; X86-NEXT:  # %bb.2114: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2115: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2117
+; X86-NEXT:  # %bb.2116: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2117: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2119
+; X86-NEXT:  # %bb.2118: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2119: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %eax
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    jne .LBB5_2121
+; X86-NEXT:  # %bb.2120: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2121: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2123
+; X86-NEXT:  # %bb.2122: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2123: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_2125
+; X86-NEXT:  # %bb.2124: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2125: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_2127
+; X86-NEXT:  # %bb.2126: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2127: # %udiv-preheader
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_2129
+; X86-NEXT:  # %bb.2128: # %udiv-preheader
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:  .LBB5_2129: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    je .LBB5_2130
+; X86-NEXT:  # %bb.2131: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jae .LBB5_2132
+; X86-NEXT:  .LBB5_2133: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2134
+; X86-NEXT:  .LBB5_2135: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_2137
+; X86-NEXT:  .LBB5_2136: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2137: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_2139
+; X86-NEXT:  # %bb.2138: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2139: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2141
+; X86-NEXT:  # %bb.2140: # %udiv-preheader
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_2141: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %edx
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2143
+; X86-NEXT:  # %bb.2142: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_2143: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %edx
+; X86-NEXT:    shrl %cl, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2145
+; X86-NEXT:  # %bb.2144: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:  .LBB5_2145: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB5_2147
+; X86-NEXT:  # %bb.2146: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:  .LBB5_2147: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jae .LBB5_2149
+; X86-NEXT:  # %bb.2148:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2149: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2151
+; X86-NEXT:  # %bb.2150:
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2151: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2153
+; X86-NEXT:  # %bb.2152:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %ebp, %edi
+; X86-NEXT:  .LBB5_2153: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_2155
+; X86-NEXT:  # %bb.2154:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2155: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2157
+; X86-NEXT:  # %bb.2156: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_2157: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    je .LBB5_2159
+; X86-NEXT:  # %bb.2158: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2159: # %udiv-preheader
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_2161
+; X86-NEXT:  # %bb.2160: # %udiv-preheader
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB5_2161: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2162
+; X86-NEXT:  # %bb.2163: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jb .LBB5_2164
+; X86-NEXT:  .LBB5_2165: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %ebx
+; X86-NEXT:    jae .LBB5_2167
+; X86-NEXT:  .LBB5_2166:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:  .LBB5_2167: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2169
+; X86-NEXT:  # %bb.2168: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_2169: # %udiv-preheader
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_2171
+; X86-NEXT:  # %bb.2170: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB5_2171: # %udiv-preheader
+; X86-NEXT:    cmpl $256, %ecx # imm = 0x100
+; X86-NEXT:    jae .LBB5_2173
+; X86-NEXT:  # %bb.2172:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_2173: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2174
+; X86-NEXT:  # %bb.2175: # %udiv-preheader
+; X86-NEXT:    cmpl $512, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jb .LBB5_2176
+; X86-NEXT:  .LBB5_2177: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_2179
+; X86-NEXT:  .LBB5_2178: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2179: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2181
+; X86-NEXT:  # %bb.2180: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2181: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_2183
+; X86-NEXT:  # %bb.2182: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_2183: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2185
+; X86-NEXT:  # %bb.2184: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2185: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2187
+; X86-NEXT:  # %bb.2186: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2187: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_2189
+; X86-NEXT:  # %bb.2188: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_2189: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2191
+; X86-NEXT:  # %bb.2190: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2191: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2193
+; X86-NEXT:  # %bb.2192: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2193: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    jne .LBB5_2195
+; X86-NEXT:  # %bb.2194: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_2195: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2197
+; X86-NEXT:  # %bb.2196: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2197: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_2199
+; X86-NEXT:  # %bb.2198: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_2199: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jae .LBB5_2201
+; X86-NEXT:  # %bb.2200:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2201: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2203
+; X86-NEXT:  # %bb.2202:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2203: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_2205
+; X86-NEXT:  # %bb.2204:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2205: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2207
+; X86-NEXT:  # %bb.2206: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2207: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2209
+; X86-NEXT:  # %bb.2208: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2209: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2211
+; X86-NEXT:  # %bb.2210: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2211: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_2213
+; X86-NEXT:  # %bb.2212: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_2213: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    je .LBB5_2215
+; X86-NEXT:  # %bb.2214: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2215: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_2217
+; X86-NEXT:  # %bb.2216:
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %edx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2217: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    je .LBB5_2219
+; X86-NEXT:  # %bb.2218: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_2219: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    jae .LBB5_2221
+; X86-NEXT:  # %bb.2220:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:  .LBB5_2221: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2223
+; X86-NEXT:  # %bb.2222:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB5_2223: # %udiv-preheader
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    je .LBB5_2225
+; X86-NEXT:  # %bb.2224: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB5_2225: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2227
+; X86-NEXT:  # %bb.2226: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_2227: # %udiv-preheader
+; X86-NEXT:    cmpl $256, %ebx # imm = 0x100
+; X86-NEXT:    jae .LBB5_2229
+; X86-NEXT:  # %bb.2228:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:  .LBB5_2229: # %udiv-preheader
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    je .LBB5_2231
+; X86-NEXT:  # %bb.2230: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_2231: # %udiv-preheader
+; X86-NEXT:    cmpl $512, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jae .LBB5_2233
+; X86-NEXT:  # %bb.2232:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2233: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2235
+; X86-NEXT:  # %bb.2234: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2235: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2237
+; X86-NEXT:  # %bb.2236: # %udiv-preheader
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB5_2237: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    je .LBB5_2238
+; X86-NEXT:  # %bb.2239: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2240
+; X86-NEXT:  .LBB5_2241: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2242
+; X86-NEXT:  .LBB5_2243: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2245
+; X86-NEXT:  .LBB5_2244: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB5_2245: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2247
+; X86-NEXT:  # %bb.2246: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_2247: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    je .LBB5_2249
+; X86-NEXT:  # %bb.2248: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2249: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB5_2251
+; X86-NEXT:  # %bb.2250: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_2251: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2253
+; X86-NEXT:  # %bb.2252:
+; X86-NEXT:    orl %esi, %ebx
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:  .LBB5_2253: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_2255
+; X86-NEXT:  # %bb.2254: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_2255: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2257
+; X86-NEXT:  # %bb.2256:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB5_2257: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB5_2259
+; X86-NEXT:  # %bb.2258: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:  .LBB5_2259: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2261
+; X86-NEXT:  # %bb.2260: # %udiv-preheader
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_2261: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_2263
+; X86-NEXT:  # %bb.2262:
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:  .LBB5_2263: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    je .LBB5_2265
+; X86-NEXT:  # %bb.2264: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2265: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB5_2267
+; X86-NEXT:  # %bb.2266: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:  .LBB5_2267: # %udiv-preheader
+; X86-NEXT:    cmpl $512, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jb .LBB5_2268
+; X86-NEXT:  # %bb.2269: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_2270
+; X86-NEXT:  .LBB5_2130: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jb .LBB5_2133
+; X86-NEXT:  .LBB5_2132: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2135
+; X86-NEXT:  .LBB5_2134: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_2136
+; X86-NEXT:    jmp .LBB5_2137
+; X86-NEXT:  .LBB5_2162: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jae .LBB5_2165
+; X86-NEXT:  .LBB5_2164:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    cmpl $128, %ebx
+; X86-NEXT:    jb .LBB5_2166
+; X86-NEXT:    jmp .LBB5_2167
+; X86-NEXT:  .LBB5_2174: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    cmpl $512, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jae .LBB5_2177
+; X86-NEXT:  .LBB5_2176:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_2178
+; X86-NEXT:    jmp .LBB5_2179
+; X86-NEXT:  .LBB5_2238: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2241
+; X86-NEXT:  .LBB5_2240: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2243
+; X86-NEXT:  .LBB5_2242: # %udiv-preheader
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2244
+; X86-NEXT:    jmp .LBB5_2245
+; X86-NEXT:  .LBB5_2268:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2270: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB5_2271
+; X86-NEXT:  # %bb.2272: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jae .LBB5_2273
+; X86-NEXT:  .LBB5_2274: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jae .LBB5_2275
+; X86-NEXT:  .LBB5_2276: # %udiv-preheader
+; X86-NEXT:    cmpl $256, %ecx # imm = 0x100
+; X86-NEXT:    jb .LBB5_2278
+; X86-NEXT:  .LBB5_2277: # %udiv-preheader
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:  .LBB5_2278: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2280
+; X86-NEXT:  # %bb.2279: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_2280: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_2282
+; X86-NEXT:  # %bb.2281: # %udiv-preheader
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:  .LBB5_2282: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_2284
+; X86-NEXT:  # %bb.2283: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_2284: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2286
+; X86-NEXT:  # %bb.2285: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2286: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    je .LBB5_2288
+; X86-NEXT:  # %bb.2287: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2288: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2290
+; X86-NEXT:  # %bb.2289: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2290: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2292
+; X86-NEXT:  # %bb.2291: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2292: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    je .LBB5_2294
+; X86-NEXT:  # %bb.2293: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_2294: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_2295
+; X86-NEXT:  # %bb.2296: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2297
+; X86-NEXT:  .LBB5_2298: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2300
+; X86-NEXT:  .LBB5_2299:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:  .LBB5_2300: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2302
+; X86-NEXT:  # %bb.2301: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_2302: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_2304
+; X86-NEXT:  # %bb.2303: # %udiv-preheader
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB5_2304: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_2306
+; X86-NEXT:  # %bb.2305:
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB5_2306: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2308
+; X86-NEXT:  # %bb.2307: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2308: # %udiv-preheader
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2310
+; X86-NEXT:  # %bb.2309: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB5_2310: # %udiv-preheader
+; X86-NEXT:    cmpl $512, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    jae .LBB5_2312
+; X86-NEXT:  # %bb.2311:
+; X86-NEXT:    orl %ebp, %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:  .LBB5_2312: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2314
+; X86-NEXT:  # %bb.2313: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2314: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB5_2316
+; X86-NEXT:  # %bb.2315: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_2316: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2318
+; X86-NEXT:  # %bb.2317: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2318: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB5_2320
+; X86-NEXT:  # %bb.2319: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:  .LBB5_2320: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_2322
+; X86-NEXT:  # %bb.2321: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_2322: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2324
+; X86-NEXT:  # %bb.2323: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2324: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2326
+; X86-NEXT:  # %bb.2325:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2326: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    je .LBB5_2328
+; X86-NEXT:  # %bb.2327: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_2328: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jb .LBB5_2330
+; X86-NEXT:  # %bb.2329: # %udiv-preheader
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:  .LBB5_2330: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2332
+; X86-NEXT:  # %bb.2331: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2332: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2334
+; X86-NEXT:  # %bb.2333: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2334: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2336
+; X86-NEXT:  # %bb.2335: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2336: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2338
+; X86-NEXT:  # %bb.2337: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2338: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2340
+; X86-NEXT:  # %bb.2339: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2340: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2342
+; X86-NEXT:  # %bb.2341: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2342: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jb .LBB5_2344
+; X86-NEXT:  # %bb.2343: # %udiv-preheader
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:  .LBB5_2344: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_2345
+; X86-NEXT:  # %bb.2346: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2349
+; X86-NEXT:  .LBB5_2348:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2349: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2351
+; X86-NEXT:  # %bb.2350:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2351: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_2353
+; X86-NEXT:  # %bb.2352: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:  .LBB5_2353: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2355
+; X86-NEXT:  # %bb.2354: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2355: # %udiv-preheader
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    jne .LBB5_2356
+; X86-NEXT:  # %bb.2357: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2358
+; X86-NEXT:  .LBB5_2359: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2360
+; X86-NEXT:  .LBB5_2361: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2363
+; X86-NEXT:  .LBB5_2362: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB5_2363: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    je .LBB5_2365
+; X86-NEXT:  # %bb.2364: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_2365: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    je .LBB5_2367
+; X86-NEXT:  # %bb.2366: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2367: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_2369
+; X86-NEXT:  # %bb.2368:
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_2369: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_2371
+; X86-NEXT:  # %bb.2370: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2371: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    je .LBB5_2373
+; X86-NEXT:  # %bb.2372: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:  .LBB5_2373: # %udiv-preheader
+; X86-NEXT:    cmpl $512, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_2375
+; X86-NEXT:  # %bb.2374:
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB5_2375: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB5_2376
+; X86-NEXT:  # %bb.2377: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_2378
+; X86-NEXT:  .LBB5_2379: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2380
+; X86-NEXT:  .LBB5_2381: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jae .LBB5_2382
+; X86-NEXT:  .LBB5_2383: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2384
+; X86-NEXT:  .LBB5_2385: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2386
+; X86-NEXT:  .LBB5_2387: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jb .LBB5_2389
+; X86-NEXT:  .LBB5_2388: # %udiv-preheader
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB5_2389: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jae .LBB5_2390
+; X86-NEXT:  # %bb.2391: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2392
+; X86-NEXT:  .LBB5_2393: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    jae .LBB5_2394
+; X86-NEXT:  .LBB5_2395: # %udiv-preheader
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jb .LBB5_2397
+; X86-NEXT:  .LBB5_2396: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2397: # %udiv-preheader
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2399
+; X86-NEXT:  # %bb.2398: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2399: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2401
+; X86-NEXT:  # %bb.2400: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2401: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2403
+; X86-NEXT:  # %bb.2402: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_2403: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2405
+; X86-NEXT:  # %bb.2404: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2405: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2407
+; X86-NEXT:  # %bb.2406: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2407: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    jne .LBB5_2409
+; X86-NEXT:  # %bb.2408: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_2409: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2411
+; X86-NEXT:  # %bb.2410: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2411: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2413
+; X86-NEXT:  # %bb.2412: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2413: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB5_2415
+; X86-NEXT:  # %bb.2414: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_2415: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2417
+; X86-NEXT:  # %bb.2416: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2417: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB5_2419
+; X86-NEXT:  # %bb.2418: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2419: # %udiv-preheader
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2421
+; X86-NEXT:  # %bb.2420: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2421: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB5_2423
+; X86-NEXT:  # %bb.2422: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_2423: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2425
+; X86-NEXT:  # %bb.2424: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2425: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2427
+; X86-NEXT:  # %bb.2426:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2427: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2429
+; X86-NEXT:  # %bb.2428:
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %edx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:  .LBB5_2429: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jae .LBB5_2431
+; X86-NEXT:  # %bb.2430:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2431: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2433
+; X86-NEXT:  # %bb.2432: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2433: # %udiv-preheader
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2435
+; X86-NEXT:  # %bb.2434: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2435: # %udiv-preheader
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2437
+; X86-NEXT:  # %bb.2436: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_2437: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_2439
+; X86-NEXT:  # %bb.2438: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB5_2439: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    je .LBB5_2441
+; X86-NEXT:  # %bb.2440: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB5_2441: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2443
+; X86-NEXT:  # %bb.2442: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2443: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2445
+; X86-NEXT:  # %bb.2444: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2445: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2447
+; X86-NEXT:  # %bb.2446: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2447: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB5_2449
+; X86-NEXT:  # %bb.2448: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2449: # %udiv-preheader
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    jb .LBB5_2450
+; X86-NEXT:  # %bb.2451: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jmp .LBB5_2452
+; X86-NEXT:  .LBB5_2271: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    cmpl $64, %ecx
+; X86-NEXT:    jb .LBB5_2274
+; X86-NEXT:  .LBB5_2273: # %udiv-preheader
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:    cmpl $128, %ecx
+; X86-NEXT:    jb .LBB5_2276
+; X86-NEXT:  .LBB5_2275: # %udiv-preheader
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:    cmpl $256, %ecx # imm = 0x100
+; X86-NEXT:    jae .LBB5_2277
+; X86-NEXT:    jmp .LBB5_2278
+; X86-NEXT:  .LBB5_2295: # %udiv-preheader
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2298
+; X86-NEXT:  .LBB5_2297:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2299
+; X86-NEXT:    jmp .LBB5_2300
+; X86-NEXT:  .LBB5_2345:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2348
+; X86-NEXT:    jmp .LBB5_2349
+; X86-NEXT:  .LBB5_2356: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2359
+; X86-NEXT:  .LBB5_2358:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2361
+; X86-NEXT:  .LBB5_2360:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2362
+; X86-NEXT:    jmp .LBB5_2363
+; X86-NEXT:  .LBB5_2376: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jb .LBB5_2379
+; X86-NEXT:  .LBB5_2378: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2381
+; X86-NEXT:  .LBB5_2380: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, %edx
+; X86-NEXT:    jb .LBB5_2383
+; X86-NEXT:  .LBB5_2382: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2385
+; X86-NEXT:  .LBB5_2384: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2387
+; X86-NEXT:  .LBB5_2386: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $64, %ebx
+; X86-NEXT:    jae .LBB5_2388
+; X86-NEXT:    jmp .LBB5_2389
+; X86-NEXT:  .LBB5_2390: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jb .LBB5_2393
+; X86-NEXT:  .LBB5_2392: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, %edx
+; X86-NEXT:    jb .LBB5_2395
+; X86-NEXT:  .LBB5_2394: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpl $128, %eax
+; X86-NEXT:    jae .LBB5_2396
+; X86-NEXT:    jmp .LBB5_2397
+; X86-NEXT:  .LBB5_2450:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB5_2452: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2454
+; X86-NEXT:  # %bb.2453: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB5_2454: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2456
+; X86-NEXT:  # %bb.2455:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:  .LBB5_2456: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2458
+; X86-NEXT:  # %bb.2457:
+; X86-NEXT:    orl %edi, %esi
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB5_2458: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2460
+; X86-NEXT:  # %bb.2459: # %udiv-preheader
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:  .LBB5_2460: # %udiv-preheader
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_2462
+; X86-NEXT:  # %bb.2461:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2462: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $64, %eax
+; X86-NEXT:    jae .LBB5_2464
+; X86-NEXT:  # %bb.2463:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2464: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    je .LBB5_2466
+; X86-NEXT:  # %bb.2465: # %udiv-preheader
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB5_2466: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_2468
+; X86-NEXT:  # %bb.2467:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2468: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB5_2469
+; X86-NEXT:  # %bb.2470: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB5_2471
+; X86-NEXT:  .LBB5_2472: # %udiv-preheader
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jb .LBB5_2474
+; X86-NEXT:  .LBB5_2473: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB5_2474: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB5_2476
+; X86-NEXT:  # %bb.2475: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB5_2476: # %udiv-preheader
+; X86-NEXT:    cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    jae .LBB5_2478
+; X86-NEXT:  # %bb.2477:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB5_2478: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_2480
+; X86-NEXT:  # %bb.2479: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2480: # %udiv-preheader
+; X86-NEXT:    cmpl $256, %edx # imm = 0x100
+; X86-NEXT:    jae .LBB5_2482
+; X86-NEXT:  # %bb.2481:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2482: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    je .LBB5_2484
+; X86-NEXT:  # %bb.2483: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB5_2484: # %udiv-preheader
+; X86-NEXT:    cmpl $512, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x200
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jb .LBB5_2485
+; X86-NEXT:  # %bb.2486: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB5_2487
+; X86-NEXT:  .LBB5_2469: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    je .LBB5_2472
+; X86-NEXT:  .LBB5_2471: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $256, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    # imm = 0x100
+; X86-NEXT:    jae .LBB5_2473
+; X86-NEXT:    jmp .LBB5_2474
+; X86-NEXT:  .LBB5_2485:
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2487: # %udiv-preheader
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB5_2489
+; X86-NEXT:  # %bb.2488: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB5_2489: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $128, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_2491
+; X86-NEXT:  # %bb.2490: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:  .LBB5_2491: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $256, %ebx # imm = 0x100
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jb .LBB5_2493
+; X86-NEXT:  # %bb.2492: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2493: # %udiv-preheader
+; X86-NEXT:    cmpl $512, %ebx # imm = 0x200
+; X86-NEXT:    jb .LBB5_2495
+; X86-NEXT:  # %bb.2494: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB5_2495: # %udiv-preheader
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $511, %ebx # imm = 0x1FF
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB5_172: # %udiv-do-while
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    shrl $8, %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    leal (%eax,%ebx,2), %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebp, %eax
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ebp
+; X86-NEXT:    orl %ebx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl $1, %edx, %eax
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edx
+; X86-NEXT:    orl %ebx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl $1, %edx, %eax
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edx
+; X86-NEXT:    orl %ebx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl $1, %ecx, %eax
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ecx
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %eax
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ecx
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %eax
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ecx
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %eax
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ecx
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebp, %eax
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ebp
+; X86-NEXT:    orl %ebx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebp, %eax
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ebp
+; X86-NEXT:    orl %ebx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %eax
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl $1, %esi, %ecx
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %esi
+; X86-NEXT:    orl %ebx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ecx
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    cmpl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl (%esp), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    shll $23, %esi
+; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    subl %esi, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl %edx, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    addl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %edx
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebp
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ecx
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    adcl $-1, %edi
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl $-1, %esi
+; X86-NEXT:    adcl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl $511, %eax # imm = 0x1FF
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $511, %eax # imm = 0x1FF
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ebp, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB5_172
+; X86-NEXT:  .LBB5_173: # %udiv-loop-exit
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    orl %ebp, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    orl %ebp, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebx, %eax
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ebx
+; X86-NEXT:    orl %ebp, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl $1, %edx, %eax
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edx
+; X86-NEXT:    orl %ebp, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebx, %eax
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ebx
+; X86-NEXT:    orl %ebp, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl $1, %ecx, %eax
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ecx
+; X86-NEXT:    orl %ebp, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebx, %eax
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ebx
+; X86-NEXT:    orl %ebp, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %eax
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ecx
+; X86-NEXT:    orl %ebp, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebx, %eax
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ebx
+; X86-NEXT:    orl %ebp, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %eax
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ecx
+; X86-NEXT:    orl %ebp, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebx, %eax
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ebx
+; X86-NEXT:    orl %ebp, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %edi
+; X86-NEXT:    orl %ebp, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebx, %eax
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ebx
+; X86-NEXT:    orl %ebp, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %eax
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl $1, %esi, %ecx
+; X86-NEXT:    orl %ebp, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %esi
+; X86-NEXT:    orl %ebp, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ecx
+; X86-NEXT:    orl %ebp, %ecx
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebp, %eax
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ebp
+; X86-NEXT:    orl %edx, %ebp
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB5_4798: # %udiv-end
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ecx, %ebp
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %ebx, %ecx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ebx, %edi
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %ebp, %esi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %esi, %ebx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %esi, %ebp
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl %ebx, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ecx, %ebp
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %ebx, %ecx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %ebp, %esi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %esi, %ebx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl %ebx, %ecx
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ebx, %eax
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ecx, %ebx
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ebp
+; X86-NEXT:    setb %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    mull %edi
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl %cl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %esi, %edi
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %ebp, %ecx
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl %ecx, %esi
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ecx, %ebx
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    adcl %edi, %ebp
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ebp, %edi
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    adcl %ebx, %esi
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movzbl %bl, %esi
+; X86-NEXT:    adcl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    addl %edi, %ebx
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    adcl $0, %ecx
+; X86-NEXT:    adcl $0, %eax
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    addl (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ecx, %ebx
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %ecx
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %ebx, %esi
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %esi, %ecx
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %ebx, %ebp
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ebp, %ebx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb (%esp) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    adcl %edi, %esi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl %ebx, %ecx
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movzbl (%esp), %esi # 1-byte Folded Reload
+; X86-NEXT:    adcl %esi, %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb (%esp) # 1-byte Folded Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl (%esp), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ecx, %ebp
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %ebx, %ecx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %ebp, %esi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %esi, %ebx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %esi, %ebp
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl %ebx, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ecx, %ebp
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %ebx, %ecx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %ebp, %esi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %esi, %ebx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl %ebx, %ecx
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ebx, %eax
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ebp, %ebx
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ebp
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %esi, %edi
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %ebp, %ecx
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl %ecx, %esi
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    adcl %edi, %ebp
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ebp, %edi
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    adcl %ebx, %esi
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movzbl %bl, %esi
+; X86-NEXT:    adcl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    addl %edi, %ebx
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    adcl $0, %ecx
+; X86-NEXT:    adcl $0, %eax
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    addl (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl (%esp), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %ecx
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %esi, %ebp
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %ebx, %esi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %esi, %ecx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %esi, %edi
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %ebx, %ebp
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ebp, %ebx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    movl (%esp), %ebp # 4-byte Reload
+; X86-NEXT:    addl %ebx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %ebp
+; X86-NEXT:    movl %ebp, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    adcl $0, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ecx, %ebp
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %ebx, %ecx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    adcl %ebp, %edi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %edi, %ebp
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %esi, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ecx, %ebx
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %ebp
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %ebp, %ecx
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    adcl %ebp, %edi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %edi, %ebp
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl %ebp, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ebx, %eax
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ebp, %ebx
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %ebp
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %ebp, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    adcl %esi, %ebp
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl %ebp, %esi
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    adcl %ebx, %edi
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movzbl %bl, %edi
+; X86-NEXT:    adcl %edi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    addl %esi, %edi
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    adcl $0, %ecx
+; X86-NEXT:    adcl $0, %eax
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ecx, %ebx
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %ebx, %edi
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %edi, %ecx
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    adcl %ebx, %ebp
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ebp, %ebx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %edi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    addl %ebx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %ebp
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    adcl $0, %eax
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl $0, %ecx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb (%esp) # 1-byte Folded Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl (%esp), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ebx, %ebp
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl %ebx, %esi
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %ebp, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ecx, %ebp
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %esi, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl %ebp, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ecx, %ebx
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ebp
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl %ebp, %esi
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %ebp, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ecx, %ebp
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl %ebp, %ecx
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ebx, %eax
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ecx, %ebx
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %ebp
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %ebp, %ecx
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    adcl %ebp, %edi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %edi, %ebx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ebp
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    addl %ebx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    adcl $0, %ecx
+; X86-NEXT:    adcl $0, %eax
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    addl (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ecx, %ebx
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %ecx
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ecx, %ebx
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %esi
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %esi, %edi
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    adcl %ebx, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl %ecx, %esi
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb (%esp) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    addl %esi, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ebx, %ebp
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %ebx, %ecx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %ebp, %esi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %esi, %ebp
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl (%esp), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb (%esp) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %esi, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ecx, %ebp
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %ecx
+; X86-NEXT:    setb (%esp) # 1-byte Folded Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl (%esp), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ecx, %ebx
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %ebp, %esi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %esi, %ecx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %esi, %edi
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %ebx, %ebp
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl %ebp, %esi
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ecx, %ebp
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    adcl %edi, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    addl %esi, %ecx
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ebx, %eax
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ebx
+; X86-NEXT:    setb %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl %cl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %ebx, %ecx
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl %ecx, %esi
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %ebp, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ebx, %ebp
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    adcl %edi, %esi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X86-NEXT:    adcl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    addl %ebp, %edi
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, %eax
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ecx, %ebx
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %ecx
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ecx, %ebx
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %esi
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %esi, %ecx
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    adcl %edi, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ebx, %edi
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    adcl %ebx, %esi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %esi, %ebx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    addl %edi, %ecx
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %ebx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %ecx
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %edi, %ebp
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %ebp, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %ebx, %ecx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    adcl %ebx, %ebp
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ebp, %ebx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    adcl %esi, %edi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    imull %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    addl %esi, %edx
+; X86-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    addl %edx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    imull %ebx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    addl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    imull %ebp, %ecx
+; X86-NEXT:    addl %edx, %ecx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ecx, %ebp
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %edi
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movzbl %bl, %ecx
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    imull %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    addl %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    imull %ebp, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    imull %ebx, %ecx
+; X86-NEXT:    addl %edx, %ecx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl %ecx, %esi
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %esi, %ebx
+; X86-NEXT:    adcl %ebp, %edi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl %ecx, %esi
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %ebp, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %ebx, %ecx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %ebx, %ebp
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ebp, %ebx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    adcl %edi, %esi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    imull %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    addl %esi, %edx
+; X86-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    addl %edx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    imull %ebp, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    imull %ebx, %ecx
+; X86-NEXT:    addl %edx, %ecx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ecx, %ebp
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %edi
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movzbl %bl, %ecx
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    imull %ebp, %ecx
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    addl %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    imull %ebx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    addl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    imull %edi, %ecx
+; X86-NEXT:    addl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    addl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %esi, %ebx
+; X86-NEXT:    adcl %ebp, %edi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X86-NEXT:    adcl %esi, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl (%esp), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ecx, %ebp
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %ebx, %ecx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %ebp, %esi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %esi, %ebp
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl (%esp), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb (%esp) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %esi, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ecx, %ebp
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %ecx
+; X86-NEXT:    setb (%esp) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl (%esp), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ecx, %ebx
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %ebp, %esi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %esi, %ecx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %esi, %edi
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %ebx, %ebp
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl %ebp, %esi
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ecx, %ebp
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    adcl %edi, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    addl %esi, %ecx
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ebx, %eax
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ebp, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %ebx, %ecx
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl %ecx, %esi
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    adcl %ebp, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ebx, %ebp
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    adcl %edi, %esi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X86-NEXT:    adcl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    addl %ebp, %edi
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, %eax
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ecx, %ebx
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %ecx
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %esi, %ebx
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %esi
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %esi, %ecx
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %esi, %ebp
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    adcl %edi, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ebx, %edi
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl (%esp), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    adcl %ebx, %esi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %esi, %ebx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl %edi, %ecx
+; X86-NEXT:    adcl (%esp), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %ebx
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ecx, %ebx
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ebp, %edi
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %ebx, %ebp
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl %ebp, %ecx
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    adcl %ebp, %ebx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ebx, %edi
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl (%esp), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    adcl %esi, %ebp
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl (%esp), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, (%esp) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    addl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    imull %ecx, %eax
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    imull %ebx, %ecx
+; X86-NEXT:    addl %edx, %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    imull %ebp, %ecx
+; X86-NEXT:    addl %edx, %ecx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %edi, %ebx
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ebp
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movzbl %bl, %ecx
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    imull %ebx, %ecx
+; X86-NEXT:    addl %edx, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    imull %esi, %ecx
+; X86-NEXT:    addl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    imull %edi, %ecx
+; X86-NEXT:    addl %edx, %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    imull %esi, %ecx
+; X86-NEXT:    addl %edx, %ecx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl %edi, %esi
+; X86-NEXT:    adcl %ebp, %ebx
+; X86-NEXT:    setb %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movzbl %cl, %ecx
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl (%esp), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ecx, %ebx
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ecx
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %ebx, %ebp
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ebp, %ebx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    adcl %ebp, %ecx
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ecx, %ebp
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %edx
+; X86-NEXT:    addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl (%esp), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    adcl %esi, %edi
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl (%esp), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    adcl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    imull %ebx, %esi
+; X86-NEXT:    addl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    imull %ecx, %eax
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    imull %esi, %ecx
+; X86-NEXT:    addl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    imull %ebp, %edi
+; X86-NEXT:    addl %ecx, %edi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl (%esp), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %edi, %ebp
+; X86-NEXT:    setb %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movzbl %cl, %ecx
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    addl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    imull %ebp, %eax
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    imull %ebx, %ecx
+; X86-NEXT:    addl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    imull %edi, %esi
+; X86-NEXT:    addl %ecx, %esi
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    addl (%esp), %ecx # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    adcl %ebx, %edi
+; X86-NEXT:    setb %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    movzbl %cl, %ecx
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    adcl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl (%esp), %esi # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %ebp # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, (%esp) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %ebp # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, (%esp) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %ebp # 4-byte Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, (%esp) # 4-byte Spill
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    subl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl (%esp), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    xorl %edx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %edx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %edx, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %edx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    xorl %edx, %ebx
+; X86-NEXT:    xorl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    xorl %edx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    xorl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    xorl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    xorl %edx, %ebp
+; X86-NEXT:    subl %edx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %edx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %edx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl %edx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    sbbl %edx, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl %edx, %ecx
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    sbbl %edx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl %edx, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, 4(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, 8(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, 12(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, 16(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, 20(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, 24(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, 28(%eax)
+; X86-NEXT:    movl %ebp, 32(%eax)
+; X86-NEXT:    movl %ebx, 36(%eax)
+; X86-NEXT:    movl %ecx, 40(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 44(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 48(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 52(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 56(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 60(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 64(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 68(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 72(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 76(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 80(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 84(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 88(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 92(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 96(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 100(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 104(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 108(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 112(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 116(%eax)
+; X86-NEXT:    movl %edi, 120(%eax)
+; X86-NEXT:    andl $511, %esi # imm = 0x1FF
+; X86-NEXT:    movw %si, 124(%eax)
+; X86-NEXT:    addl $1676, %esp # imm = 0x68C
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: srem1001:
+; X64:       # %bb.0: # %_udiv-special-cases
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %r13
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    subq $1160, %rsp # imm = 0x488
+; X64-NEXT:    movq %rcx, %rbp
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rsi, %r10
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq %rcx, %rdx
+; X64-NEXT:    shlq $23, %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:    shlq $23, %rcx
+; X64-NEXT:    sarq $63, %rcx
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    xorq %rcx, %rsi
+; X64-NEXT:    movq %rsi, %r15
+; X64-NEXT:    xorq %rcx, %r9
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    xorq %rcx, %r8
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    xorq %rcx, %rbp
+; X64-NEXT:    movq %rbp, %r13
+; X64-NEXT:    xorq %rcx, %rbx
+; X64-NEXT:    movq %rbx, %r8
+; X64-NEXT:    xorq %rcx, %r10
+; X64-NEXT:    movq %r10, %rsi
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    xorq %rcx, %rdi
+; X64-NEXT:    movq %rdi, %r9
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    xorq %rcx, %rdi
+; X64-NEXT:    movq %rdi, %r12
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    xorq %rcx, %rdi
+; X64-NEXT:    movq %rdi, %r10
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    xorq %rcx, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    xorq %rcx, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    xorq %rcx, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    xorq %rcx, %rdi
+; X64-NEXT:    movq %rdi, %r11
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    xorq %rcx, %rdi
+; X64-NEXT:    movq %rdi, (%rsp) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    xorq %rcx, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    xorq %rcx, %rdi
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
+; X64-NEXT:    xorq %rax, %rbp
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r14
+; X64-NEXT:    xorq %rax, %r14
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    subq %rcx, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rcx, %r8
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r13, %rsi
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    sbbq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    sbbq %rcx, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    sbbq %rcx, (%rsp) # 8-byte Folded Spill
+; X64-NEXT:    sbbq %rcx, %r11
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    sbbq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    sbbq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    sbbq %rcx, %r10
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r12, %r13
+; X64-NEXT:    sbbq %rcx, %r13
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rcx, %r9
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r15, %rdi
+; X64-NEXT:    sbbq %rcx, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movabsq $2199023255551, %rcx # imm = 0x1FFFFFFFFFF
+; X64-NEXT:    andq %rcx, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    subq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rax, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    sbbq %rax, %r15
+; X64-NEXT:    sbbq %rax, %r14
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    sbbq %rax, %r10
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    sbbq %rax, %r12
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    sbbq %rax, %r14
+; X64-NEXT:    sbbq %rax, %rbp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    sbbq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    sbbq %rax, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    sbbq %rax, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    sbbq %rax, %r11
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    sbbq %rax, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    sbbq %rax, %r8
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    sbbq %rax, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    sbbq %rax, %r9
+; X64-NEXT:    orq %rcx, %r10
+; X64-NEXT:    orq %rdx, %r10
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r14, %rcx
+; X64-NEXT:    orq %rdi, %rcx
+; X64-NEXT:    orq %rsi, %rcx
+; X64-NEXT:    orq %r15, %rcx
+; X64-NEXT:    orq %r10, %rcx
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r9, %rdx
+; X64-NEXT:    movabsq $2199023255551, %rax # imm = 0x1FFFFFFFFFF
+; X64-NEXT:    andq %rax, %rdx
+; X64-NEXT:    movq %r12, %rax
+; X64-NEXT:    orq %r8, %rax
+; X64-NEXT:    orq %rbx, %rax
+; X64-NEXT:    movq %rbp, %rsi
+; X64-NEXT:    orq %rdx, %rsi
+; X64-NEXT:    orq %r11, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    orq %r14, %rsi
+; X64-NEXT:    orq %rax, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    orq %r11, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    orq %rbx, %rsi
+; X64-NEXT:    orq %rcx, %rsi
+; X64-NEXT:    sete {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    orq %r13, %rax
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq (%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    orq %rax, %rsi
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    orq %rcx, %rsi
+; X64-NEXT:    sete {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill
+; X64-NEXT:    bsrq %rbp, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    bsrq %r9, %rcx
+; X64-NEXT:    xorq $63, %rcx
+; X64-NEXT:    addq $64, %rcx
+; X64-NEXT:    testq %rbp, %rbp
+; X64-NEXT:    cmovneq %rax, %rcx
+; X64-NEXT:    bsrq %r12, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    bsrq %r8, %rdi
+; X64-NEXT:    xorq $63, %rdi
+; X64-NEXT:    addq $64, %rdi
+; X64-NEXT:    testq %r12, %r12
+; X64-NEXT:    cmovneq %rax, %rdi
+; X64-NEXT:    subq $-128, %rdi
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    orq %rbp, %rax
+; X64-NEXT:    cmovneq %rcx, %rdi
+; X64-NEXT:    movq %r14, %rsi
+; X64-NEXT:    bsrq %r14, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    movq %r15, %r14
+; X64-NEXT:    bsrq %r15, %rcx
+; X64-NEXT:    xorq $63, %rcx
+; X64-NEXT:    addq $64, %rcx
+; X64-NEXT:    testq %rsi, %rsi
+; X64-NEXT:    movq %rsi, %r15
+; X64-NEXT:    cmovneq %rax, %rcx
+; X64-NEXT:    bsrq %rbx, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    bsrq %r11, %rsi
+; X64-NEXT:    xorq $63, %rsi
+; X64-NEXT:    addq $64, %rsi
+; X64-NEXT:    testq %rbx, %rbx
+; X64-NEXT:    cmovneq %rax, %rsi
+; X64-NEXT:    subq $-128, %rsi
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %r15, %rax
+; X64-NEXT:    cmovneq %rcx, %rsi
+; X64-NEXT:    addq $256, %rsi # imm = 0x100
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movq %r9, %rcx
+; X64-NEXT:    orq %r9, %rax
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %rbp, %r12
+; X64-NEXT:    orq %rax, %r12
+; X64-NEXT:    cmovneq %rdi, %rsi
+; X64-NEXT:    xorl %ebp, %ebp
+; X64-NEXT:    addq $512, %rsi # imm = 0x200
+; X64-NEXT:    setb %bpl
+; X64-NEXT:    bsrq %rdx, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    bsrq %r8, %rcx
+; X64-NEXT:    xorq $63, %rcx
+; X64-NEXT:    addq $64, %rcx
+; X64-NEXT:    testq %rdx, %rdx
+; X64-NEXT:    cmovneq %rax, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    bsrq %rbx, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    bsrq %r9, %rdi
+; X64-NEXT:    xorq $63, %rdi
+; X64-NEXT:    addq $64, %rdi
+; X64-NEXT:    testq %rbx, %rbx
+; X64-NEXT:    movq %rbx, %r10
+; X64-NEXT:    cmovneq %rax, %rdi
+; X64-NEXT:    subq $-128, %rdi
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    orq %rdx, %rax
+; X64-NEXT:    cmovneq %rcx, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    bsrq %rbx, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    bsrq %r15, %rcx
+; X64-NEXT:    xorq $63, %rcx
+; X64-NEXT:    addq $64, %rcx
+; X64-NEXT:    testq %rbx, %rbx
+; X64-NEXT:    cmovneq %rax, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    bsrq %rax, %r11
+; X64-NEXT:    xorq $63, %r11
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    bsrq %r13, %r12
+; X64-NEXT:    xorq $63, %r12
+; X64-NEXT:    addq $64, %r12
+; X64-NEXT:    testq %rax, %rax
+; X64-NEXT:    movq %rax, %r14
+; X64-NEXT:    cmovneq %r11, %r12
+; X64-NEXT:    subq $-128, %r12
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    orq %rbx, %rax
+; X64-NEXT:    cmovneq %rcx, %r12
+; X64-NEXT:    addq $256, %r12 # imm = 0x100
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    orq %r8, %rax
+; X64-NEXT:    movq %r10, %rcx
+; X64-NEXT:    orq %rdx, %rcx
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    cmovneq %rdi, %r12
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    orq %r8, %rax
+; X64-NEXT:    orq %r9, %rax
+; X64-NEXT:    movq %r13, %rcx
+; X64-NEXT:    orq %r13, %rax
+; X64-NEXT:    orq %rbx, %rdx
+; X64-NEXT:    orq %r10, %rdx
+; X64-NEXT:    orq %r14, %rdx
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    orq %rax, %rdx
+; X64-NEXT:    cmoveq %rsi, %r12
+; X64-NEXT:    cmovneq %rcx, %rbp
+; X64-NEXT:    subq $23, %r12
+; X64-NEXT:    sbbq $0, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    bsrq %rdi, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    addq $64, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    bsrq %r11, %rcx
+; X64-NEXT:    xorq $63, %rcx
+; X64-NEXT:    testq %r11, %r11
+; X64-NEXT:    cmoveq %rax, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    bsrq %rdx, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    bsrq %r9, %rsi
+; X64-NEXT:    xorq $63, %rsi
+; X64-NEXT:    addq $64, %rsi
+; X64-NEXT:    testq %rdx, %rdx
+; X64-NEXT:    movq %rdx, %r8
+; X64-NEXT:    cmovneq %rax, %rsi
+; X64-NEXT:    subq $-128, %rsi
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rdi, %r10
+; X64-NEXT:    orq %r11, %rax
+; X64-NEXT:    cmovneq %rcx, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    bsrq %rdx, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    bsrq %rdi, %rcx
+; X64-NEXT:    xorq $63, %rcx
+; X64-NEXT:    addq $64, %rcx
+; X64-NEXT:    testq %rdx, %rdx
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovneq %rax, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    bsrq %rbp, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    bsrq %r13, %rdx
+; X64-NEXT:    xorq $63, %rdx
+; X64-NEXT:    addq $64, %rdx
+; X64-NEXT:    testq %rbp, %rbp
+; X64-NEXT:    cmovneq %rax, %rdx
+; X64-NEXT:    subq $-128, %rdx
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    orq %rbx, %rax
+; X64-NEXT:    cmovneq %rcx, %rdx
+; X64-NEXT:    addq $256, %rdx # imm = 0x100
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    orq %r10, %rax
+; X64-NEXT:    movq %r10, %r15
+; X64-NEXT:    movq %r8, %rcx
+; X64-NEXT:    orq %r11, %rcx
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    cmovneq %rsi, %rdx
+; X64-NEXT:    movq (%rsp), %r11 # 8-byte Reload
+; X64-NEXT:    bsrq %r11, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    bsrq %r9, %rcx
+; X64-NEXT:    xorq $63, %rcx
+; X64-NEXT:    addq $64, %rcx
+; X64-NEXT:    testq %r11, %r11
+; X64-NEXT:    cmovneq %rax, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    bsrq %r14, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    bsrq %r10, %rdi
+; X64-NEXT:    xorq $63, %rdi
+; X64-NEXT:    addq $64, %rdi
+; X64-NEXT:    testq %r14, %r14
+; X64-NEXT:    cmovneq %rax, %rdi
+; X64-NEXT:    subq $-128, %rdi
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %r11, %rax
+; X64-NEXT:    movq %r11, (%rsp) # 8-byte Spill
+; X64-NEXT:    cmovneq %rcx, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    bsrq %r8, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    bsrq %rbp, %rcx
+; X64-NEXT:    xorq $63, %rcx
+; X64-NEXT:    addq $64, %rcx
+; X64-NEXT:    testq %r8, %r8
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovneq %rax, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    bsrq %rbx, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    bsrq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    xorq $63, %rsi
+; X64-NEXT:    addq $64, %rsi
+; X64-NEXT:    testq %rbx, %rbx
+; X64-NEXT:    cmovneq %rax, %rsi
+; X64-NEXT:    subq $-128, %rsi
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    orq %r8, %rax
+; X64-NEXT:    cmovneq %rcx, %rsi
+; X64-NEXT:    addq $256, %rsi # imm = 0x100
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    orq %r9, %rax
+; X64-NEXT:    orq %r11, %r14
+; X64-NEXT:    orq %rax, %r14
+; X64-NEXT:    cmovneq %rdi, %rsi
+; X64-NEXT:    xorl %edi, %edi
+; X64-NEXT:    addq $512, %rsi # imm = 0x200
+; X64-NEXT:    setb %dil
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    orq %r15, %rax
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    orq %r13, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    cmovneq %rdx, %rsi
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    cmovneq %rax, %rdi
+; X64-NEXT:    subq $23, %rsi
+; X64-NEXT:    sbbq $0, %rdi
+; X64-NEXT:    movl $0, %edx
+; X64-NEXT:    sbbq %rdx, %rdx
+; X64-NEXT:    movl $0, %ebp
+; X64-NEXT:    sbbq %rbp, %rbp
+; X64-NEXT:    movl $0, %ebx
+; X64-NEXT:    sbbq %rbx, %rbx
+; X64-NEXT:    movl $0, %r8d
+; X64-NEXT:    sbbq %r8, %r8
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    movl $0, %r10d
+; X64-NEXT:    sbbq %r10, %r10
+; X64-NEXT:    movl $0, %r11d
+; X64-NEXT:    sbbq %r11, %r11
+; X64-NEXT:    movl $0, %r14d
+; X64-NEXT:    sbbq %r14, %r14
+; X64-NEXT:    movl $0, %r15d
+; X64-NEXT:    sbbq %r15, %r15
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %r13d
+; X64-NEXT:    sbbq %r13, %r13
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    subq %rsi, %r12
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq %rdi, %rax
+; X64-NEXT:    sbbq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    sbbq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    sbbq %rbx, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    sbbq %r8, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    sbbq %rcx, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    sbbq %r10, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    sbbq %r11, %r9
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    sbbq %r14, %r11
+; X64-NEXT:    movq %rbx, %r10
+; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    sbbq %r15, %rbp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq %r12, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq %r13, %rax
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, %r13
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:    movl $0, %r12d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movl $1000, %eax # imm = 0x3E8
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpq %rbx, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rdi, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rcx, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %r14, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %r10, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %r8, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %r9, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %r11, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq %rbp, %rcx
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rbp, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rdx, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %r13, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %r15, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rsi, %rax
+; X64-NEXT:    setb %al
+; X64-NEXT:    orb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
+; X64-NEXT:    orb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovneq %r12, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    cmovneq %r12, %r13
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovneq %r12, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovneq %r12, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    cmovneq %r12, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovneq %r12, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovneq %r12, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    cmovneq %r12, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq (%rsp), %r14 # 8-byte Reload
+; X64-NEXT:    cmovneq %r12, %r14
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    cmovneq %r12, %r10
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    cmovneq %r12, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    cmovneq %r12, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovneq %r12, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    cmovneq %r12, %r15
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovneq %r12, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; X64-NEXT:    movq %r12, %r11
+; X64-NEXT:    jne .LBB5_1
+; X64-NEXT:  # %bb.7: # %_udiv-special-cases
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movabsq $2199023255551, %rcx # imm = 0x1FFFFFFFFFF
+; X64-NEXT:    andq %rcx, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    orq %rcx, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    orq %rcx, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    xorq $1000, %rcx # imm = 0x3E8
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    orq %rcx, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    orq %rcx, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    orq %rcx, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    orq %rcx, %rax
+; X64-NEXT:    orq %rdx, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    je .LBB5_8
+; X64-NEXT:  # %bb.5: # %udiv-bb1
+; X64-NEXT:    movl $1000, %r13d # imm = 0x3E8
+; X64-NEXT:    subl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movl %r13d, %ecx
+; X64-NEXT:    shlq %cl, %rsi
+; X64-NEXT:    movq %rsi, %r11
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    movq %rdx, %r12
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, %r15
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, %r10
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    shldq %cl, %r9, %rax
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rbp, %r14
+; X64-NEXT:    movq %rbp, %rsi
+; X64-NEXT:    shlq %cl, %rsi
+; X64-NEXT:    xorl %edi, %edi
+; X64-NEXT:    testb $64, %r13b
+; X64-NEXT:    cmovneq %rsi, %r14
+; X64-NEXT:    cmovneq %rax, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r10, %rbx
+; X64-NEXT:    cmovneq %rdi, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r8, %rcx
+; X64-NEXT:    cmovneq %rdi, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovneq %rdi, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovneq %rdi, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovneq %rdi, %r15
+; X64-NEXT:    movq %r11, %r10
+; X64-NEXT:    cmovneq %rdi, %r10
+; X64-NEXT:    movl $744, %ecx # imm = 0x2E8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    subl %ebx, %ecx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rbp, %rdx
+; X64-NEXT:    shlq %cl, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r12, %rax
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdi, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovneq %rdi, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovneq %rdi, %r11
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $488, %r12d # imm = 0x1E8
+; X64-NEXT:    subl %ebx, %r12d
+; X64-NEXT:    movl %r12d, %ecx
+; X64-NEXT:    shlq %cl, %r9
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rsi, %r11
+; X64-NEXT:    shlq %cl, %r11
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r12b
+; X64-NEXT:    cmovneq %rdi, %r11
+; X64-NEXT:    cmovneq %rdi, %r9
+; X64-NEXT:    movq %r9, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movq %r8, %rdx
+; X64-NEXT:    movl %r12d, %ecx
+; X64-NEXT:    shlq %cl, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r12b
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    cmovneq %rdi, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    subb %r13b, %cl
+; X64-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; X64-NEXT:    movq %r8, %rbp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rdx, %rbp
+; X64-NEXT:    shrq %cl, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movq %r9, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rax, %rbx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rax, %rbx
+; X64-NEXT:    cmovneq %rdx, %rbp
+; X64-NEXT:    orq %r10, %rbp
+; X64-NEXT:    orq %r15, %rbx
+; X64-NEXT:    cmpl $128, %r12d
+; X64-NEXT:    cmovaeq %rdi, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    leal -512(%r13), %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    leal -256(%rax), %ecx
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdi, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    leal -128(%r13), %ecx
+; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    shlq %cl, %r8
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %r8, %rsi
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovneq %rcx, %rsi
+; X64-NEXT:    cmovneq %rcx, %rax
+; X64-NEXT:    cmpl $128, %r13d
+; X64-NEXT:    cmovbq %rbx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovbq %rbp, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovaeq %rcx, %rax
+; X64-NEXT:    cmovaeq %rcx, %r14
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    cmovaeq %rcx, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    cmovaeq %rcx, %r9
+; X64-NEXT:    cmpl $256, %r13d # imm = 0x100
+; X64-NEXT:    cmovaeq %rcx, %r14
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovaeq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovaeq %rcx, %r9
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovaeq %rcx, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    xorl %r15d, %r15d
+; X64-NEXT:    movq %r10, %rdx
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    movl %r12d, %ecx
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r12b
+; X64-NEXT:    cmovneq %r15, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movb $-128, %al
+; X64-NEXT:    subb %r12b, %al
+; X64-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    movq %r14, %rdi
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %r8, %rdi
+; X64-NEXT:    movq %r8, %rbp
+; X64-NEXT:    shrq %cl, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %al
+; X64-NEXT:    cmovneq %rbp, %rdi
+; X64-NEXT:    orq %r11, %rdi
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    leal -128(%r12), %ecx
+; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r15, %rax
+; X64-NEXT:    cmpl $128, %r12d
+; X64-NEXT:    cmovbq %rdi, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testl %r13d, %r13d
+; X64-NEXT:    cmoveq %r10, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    cmoveq %r9, %rbx
+; X64-NEXT:    movl %r13d, %ecx
+; X64-NEXT:    subl $256, %ecx # imm = 0x100
+; X64-NEXT:    cmovaeq %r15, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rdx, %rax
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    shrq %cl, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdi, %rax
+; X64-NEXT:    cmpl $128, %ecx
+; X64-NEXT:    movq %rcx, %r11
+; X64-NEXT:    cmovaeq %r15, %rax
+; X64-NEXT:    orq %rsi, %rax
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    subb %r12b, %cl
+; X64-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; X64-NEXT:    movq %r14, %rsi
+; X64-NEXT:    shrdq %cl, %r8, %rsi
+; X64-NEXT:    movq %r8, %rdx
+; X64-NEXT:    movq %r8, %rbp
+; X64-NEXT:    shrq %cl, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdx, %rsi
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq %r12, %rdi
+; X64-NEXT:    leal -128(%r12), %ecx
+; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movq %r14, %rdx
+; X64-NEXT:    shlq %cl, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    cmovneq %r15, %rcx
+; X64-NEXT:    cmpl $128, %edi
+; X64-NEXT:    cmovbq %rsi, %rcx
+; X64-NEXT:    testl %edi, %edi
+; X64-NEXT:    movq %rbx, %rdx
+; X64-NEXT:    cmoveq %rbx, %rcx
+; X64-NEXT:    cmpl $256, %r13d # imm = 0x100
+; X64-NEXT:    cmovbq %rax, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r11, %rcx
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    shrq %cl, %r8
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %r8, %r11
+; X64-NEXT:    movl $0, %esi
+; X64-NEXT:    cmovneq %rsi, %r11
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movl %r13d, %ecx
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    testb $64, %r13b
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    cmovneq %rsi, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    movq %rdx, %r12
+; X64-NEXT:    movl %r13d, %ecx
+; X64-NEXT:    shlq %cl, %rdi
+; X64-NEXT:    testb $64, %r13b
+; X64-NEXT:    movq %r13, %r10
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdi, %rcx
+; X64-NEXT:    cmovneq %rsi, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rbp, %r15
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    shrq %cl, %r15
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    cmovneq %rsi, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r15, %rcx
+; X64-NEXT:    cmovneq %rsi, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movl %r10d, %ecx
+; X64-NEXT:    shldq %cl, %rbx, %rsi
+; X64-NEXT:    testb $64, %r10b
+; X64-NEXT:    cmovneq %rax, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movl %r10d, %ecx
+; X64-NEXT:    shldq %cl, %rdx, %rax
+; X64-NEXT:    testb $64, %r10b
+; X64-NEXT:    cmovneq %rdi, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    movl %r10d, %ecx
+; X64-NEXT:    shldq %cl, %r9, %r13
+; X64-NEXT:    testb $64, %r10b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    movq %r14, %rbx
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rax, %rdx
+; X64-NEXT:    movl $0, %edi
+; X64-NEXT:    cmovneq %rdi, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rbp, %rdi
+; X64-NEXT:    shldq %cl, %r14, %rdi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rcx, %rbx
+; X64-NEXT:    cmovneq %rax, %rdi
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    subb %dl, %cl
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movq %r8, %r9
+; X64-NEXT:    movq %r12, %r14
+; X64-NEXT:    shldq %cl, %r12, %r9
+; X64-NEXT:    shlq %cl, %r14
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movl %ecx, %r8d
+; X64-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; X64-NEXT:    cmovneq %r14, %r9
+; X64-NEXT:    orq %r11, %r9
+; X64-NEXT:    leal -128(%rdx), %r12d
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    movl %r12d, %ecx
+; X64-NEXT:    shrq %cl, %r11
+; X64-NEXT:    testb $64, %r12b
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    movl $0, %r10d
+; X64-NEXT:    cmovneq %r10, %rax
+; X64-NEXT:    cmpl $128, %edx
+; X64-NEXT:    cmovbq %r9, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    cmpl $128, %r9d
+; X64-NEXT:    cmovaeq %r10, %rsi
+; X64-NEXT:    cmpl $128, %ebx
+; X64-NEXT:    cmovaeq %r10, %rdi
+; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    cmoveq %rbp, %rax
+; X64-NEXT:    orq %rsi, %rax
+; X64-NEXT:    cmpl $256, %r9d # imm = 0x100
+; X64-NEXT:    cmovaeq %rdi, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    cmovneq %r10, %r14
+; X64-NEXT:    cmpl $128, %r9d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovaeq %r10, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movq %r8, %rsi
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    shlq %cl, %rsi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rsi, %rdi
+; X64-NEXT:    cmovneq %r10, %rdi
+; X64-NEXT:    movq %rbp, %rbx
+; X64-NEXT:    shldq %cl, %r8, %rbx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rsi, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rax, %rsi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    shrdq %cl, %rbp, %rax
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r15, %rax
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r9d
+; X64-NEXT:    cmovaeq %rdi, %rax
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r9d
+; X64-NEXT:    cmovaeq %rbx, %rax
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r9d
+; X64-NEXT:    cmovaeq %rsi, %r13
+; X64-NEXT:    testl %r9d, %r9d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    cmoveq %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    cmoveq %r10, %r13
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    cmoveq %rsi, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    cmovaeq %rax, %rbx
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    movl %r15d, %ecx
+; X64-NEXT:    shrdq %cl, %rbp, %rax
+; X64-NEXT:    testb $64, %r15b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movl %r12d, %ecx
+; X64-NEXT:    shrdq %cl, %rdx, %rsi
+; X64-NEXT:    testb $64, %r12b
+; X64-NEXT:    cmovneq %r11, %rsi
+; X64-NEXT:    orq %r14, %rax
+; X64-NEXT:    cmpl $128, %r15d
+; X64-NEXT:    cmovaeq %rsi, %rax
+; X64-NEXT:    testl %r15d, %r15d
+; X64-NEXT:    cmoveq %r8, %rax
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %r9d # imm = 0x100
+; X64-NEXT:    cmovaeq %rbx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    movq %rdx, %r11
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    movl %r14d, %ecx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r14b
+; X64-NEXT:    movl $0, %r8d
+; X64-NEXT:    cmovneq %r8, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $256, %r9d # imm = 0x100
+; X64-NEXT:    cmovaeq %r8, %r13
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    movl %r15d, %ecx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    testb $64, %r15b
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    cmovneq %r8, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, %rsi
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    shlq %cl, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:    cmovneq %r8, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, %rbx
+; X64-NEXT:    movl %r15d, %ecx
+; X64-NEXT:    shrdq %cl, %rdx, %rbx
+; X64-NEXT:    testb $64, %r15b
+; X64-NEXT:    cmovneq %rax, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    movl %r12d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movl %r12d, %ecx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    testb $64, %r12b
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    cmovneq %r8, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    shrdq %cl, %r10, %rbp
+; X64-NEXT:    testb $64, %r12b
+; X64-NEXT:    cmovneq %rax, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $128, %r9d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovaeq %r8, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovaeq %r8, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movl %r14d, %ecx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r14b
+; X64-NEXT:    cmovneq %r8, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r8, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r8, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    leal -128(%r14), %ecx
+; X64-NEXT:    movq (%rsp), %r13 # 8-byte Reload
+; X64-NEXT:    movq %r13, %rsi
+; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    shrq %cl, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    cmovneq %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $768, %ecx # imm = 0x300
+; X64-NEXT:    subl %r9d, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    shrq %cl, %r12
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %r12, %rax
+; X64-NEXT:    cmovneq %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    leal -128(%rcx), %esi
+; X64-NEXT:    movq %rcx, %rdi
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r11, %r8
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    shrq %cl, %r11
+; X64-NEXT:    testb $64, %sil
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    cmovneq %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    subb %dil, %cl
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rbx, %r8
+; X64-NEXT:    movq %rbx, %rbp
+; X64-NEXT:    shlq %cl, %rbp
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rbp, %r8
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovneq %rdx, %rbp
+; X64-NEXT:    movl $232, %ecx
+; X64-NEXT:    subl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movq %r9, %rdi
+; X64-NEXT:    shlq %cl, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movq %rbx, %rdx
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovneq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    cmovneq %rcx, %rax
+; X64-NEXT:    xorl %edi, %edi
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r14, %r15
+; X64-NEXT:    movl %r15d, %ecx
+; X64-NEXT:    shrq %cl, %r13
+; X64-NEXT:    testb $64, %r15b
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    cmovneq %rdi, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    subb %r15b, %cl
+; X64-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movq %rdi, %r8
+; X64-NEXT:    shlq %cl, %r8
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %r8, %rcx
+; X64-NEXT:    movl $0, %r10d
+; X64-NEXT:    cmovneq %r10, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    movl %r15d, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rax, %r14
+; X64-NEXT:    testb $64, %r15b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    cmovaeq %r10, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r9, %r10
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rax, %r10
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rcx, %r9
+; X64-NEXT:    cmovneq %r12, %r10
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rax, %rbx
+; X64-NEXT:    testb $64, %sil
+; X64-NEXT:    cmovneq %r11, %rbx
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movl %r15d, %ecx
+; X64-NEXT:    movq (%rsp), %rdx # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rdx, %rax
+; X64-NEXT:    testb $64, %r15b
+; X64-NEXT:    cmovneq %r13, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdi, %r12
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    shrdq %cl, %rdx, %r12
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rdx, %rsi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rcx, %r13
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rdx, %rsi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    shldq %cl, %rax, %rdi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r8, %rdi
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    cmpl $128, %r11d
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r11d
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r9d
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rax, %r8
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rsi, %r8
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %rbp, %r10
+; X64-NEXT:    cmpl $128, %r9d
+; X64-NEXT:    cmovaeq %rbx, %r10
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    testl %r11d, %r11d
+; X64-NEXT:    movq %r11, %r10
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    cmoveq %r15, %rdx
+; X64-NEXT:    cmpl $128, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movl $0, %ebp
+; X64-NEXT:    cmovaeq %rbp, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testl %r9d, %r9d
+; X64-NEXT:    cmoveq %rsi, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmpl $128, %eax
+; X64-NEXT:    cmovaeq %r12, %r14
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %r13d # imm = 0x100
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %eax
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $128, %r13d
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    cmovaeq %rax, %r11
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    movq %r12, %rbx
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    shlq %cl, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rbx, %r14
+; X64-NEXT:    cmovneq %rax, %r14
+; X64-NEXT:    xorl %ebp, %ebp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    shldq %cl, %r15, %rax
+; X64-NEXT:    movq %r15, %r9
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rcx, %rsi
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rdx, %r12
+; X64-NEXT:    shrq %cl, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdx, %r12
+; X64-NEXT:    cmpl $128, %esi
+; X64-NEXT:    cmovaeq %rbp, %rax
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    shrdq %cl, %rdi, %r9
+; X64-NEXT:    movq %rdi, %rsi
+; X64-NEXT:    movq %rdi, %rbx
+; X64-NEXT:    shrq %cl, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rsi, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rbp, %rsi
+; X64-NEXT:    movq %rbp, %rdi
+; X64-NEXT:    movq %rbp, %r8
+; X64-NEXT:    shrq %cl, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdi, %rsi
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; X64-NEXT:    movq %r13, %rdx
+; X64-NEXT:    cmpl $128, %edx
+; X64-NEXT:    cmovaeq %r14, %r12
+; X64-NEXT:    testl %r10d, %r10d
+; X64-NEXT:    movq %r10, %r13
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    cmoveq %rbx, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    testl %r10d, %r10d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    cmpl $128, %ebx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movl $0, %r14d
+; X64-NEXT:    cmovaeq %r14, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    cmovaeq %r14, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    testl %ecx, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    cmoveq %r8, %rbp
+; X64-NEXT:    cmpl $256, %ecx # imm = 0x100
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    cmovaeq %r14, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %r11, %rdi
+; X64-NEXT:    cmpl $256, %edx # imm = 0x100
+; X64-NEXT:    cmovaeq %rax, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    movq %r15, %rdx
+; X64-NEXT:    movl %r13d, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rbp, %rdx
+; X64-NEXT:    shrq %cl, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r13b
+; X64-NEXT:    cmovneq %rbp, %rdx
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    cmpl $128, %r14d
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; X64-NEXT:    movq %rsi, %r11
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %ebx
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movl %r10d, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rax, %rsi
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r10b
+; X64-NEXT:    cmovneq %rax, %rsi
+; X64-NEXT:    movq %rsi, %r10
+; X64-NEXT:    cmpl $128, %r8d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovaeq %rcx, %r8
+; X64-NEXT:    cmpl $0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmoveq %rax, %r12
+; X64-NEXT:    cmpl $128, %r13d
+; X64-NEXT:    cmovaeq %rcx, %rbp
+; X64-NEXT:    xorl %edi, %edi
+; X64-NEXT:    movb $-128, %bl
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    subb %sil, %bl
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    movl %ebx, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %r13, %rax
+; X64-NEXT:    movl %ebx, %ecx
+; X64-NEXT:    shrq %cl, %r13
+; X64-NEXT:    testb $64, %bl
+; X64-NEXT:    cmovneq %r13, %rax
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    leal -128(%rsi), %ecx
+; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    shlq %cl, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdi, %rdx
+; X64-NEXT:    cmpl $128, %esi
+; X64-NEXT:    movq %rsi, %rdi
+; X64-NEXT:    cmovbq %rax, %rdx
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    testl %r14d, %r14d
+; X64-NEXT:    cmoveq %r15, %r9
+; X64-NEXT:    cmpl $0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    cmpl $128, %edx
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    cmovaeq %rax, %r10
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    cmpl $256, %esi # imm = 0x100
+; X64-NEXT:    cmovaeq %rax, %r8
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %r12, %rbp
+; X64-NEXT:    testl %edi, %edi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    cmoveq %r10, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    cmpl $256, %r14d # imm = 0x100
+; X64-NEXT:    cmovaeq %r9, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    testb $64, %dil
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    cmovneq %rax, %rbp
+; X64-NEXT:    movq %rdx, %r9
+; X64-NEXT:    testb $64, %r9b
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    cmovneq %rax, %rcx
+; X64-NEXT:    cmpl $128, %esi
+; X64-NEXT:    movq %rsi, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    cmovaeq %rax, %rdx
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    cmovneq %rax, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    cmovneq %rax, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    cmovneq %rax, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %bl
+; X64-NEXT:    cmovneq %rax, %r13
+; X64-NEXT:    cmpl $128, %edi
+; X64-NEXT:    cmovaeq %rax, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $128, %r9d
+; X64-NEXT:    cmovaeq %rax, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $256, %r8d # imm = 0x100
+; X64-NEXT:    cmovaeq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq %r14, %rbp
+; X64-NEXT:    movl %ebp, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rdx, %rax
+; X64-NEXT:    testb $64, %bpl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, %r14
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rax, %r8
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    movq %r12, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    shldq %cl, %r15, %rdi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rcx, %r15
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rax, %rdx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq (%rsp), %rax # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rsi, %rax
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rbx, %rsi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movq %r9, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    movl %r11d, %ecx
+; X64-NEXT:    shldq %cl, %r10, %rbx
+; X64-NEXT:    testb $64, %r11b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    movq %rbx, %r10
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rax, %rbx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %ebp
+; X64-NEXT:    cmovaeq %r8, %r14
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r15d
+; X64-NEXT:    cmovaeq %rdx, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    cmpl $128, %ecx
+; X64-NEXT:    cmovaeq %rsi, %rdx
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    orq %r13, %r10
+; X64-NEXT:    cmpl $128, %r11d
+; X64-NEXT:    cmovaeq %rbx, %r10
+; X64-NEXT:    testl %ebp, %ebp
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT:    testl %r15d, %r15d
+; X64-NEXT:    cmoveq %r12, %rdi
+; X64-NEXT:    testl %ecx, %ecx
+; X64-NEXT:    movq (%rsp), %rdx # 8-byte Reload
+; X64-NEXT:    cmoveq %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testl %r11d, %r11d
+; X64-NEXT:    cmoveq %r9, %r10
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %ebp # imm = 0x100
+; X64-NEXT:    movq %rbp, %r9
+; X64-NEXT:    cmovaeq %rdi, %r14
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    testb $64, %r13b
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movl $0, %ebp
+; X64-NEXT:    cmovneq %rbp, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movq %rdi, %r8
+; X64-NEXT:    movl %r13d, %ecx
+; X64-NEXT:    shrq %cl, %r8
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r13b
+; X64-NEXT:    cmovneq %rbp, %r8
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    shrq %cl, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rbp, %rbx
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovneq %rbp, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovneq %rbp, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $128, %r13d
+; X64-NEXT:    cmovaeq %rbp, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r9, %rcx
+; X64-NEXT:    testl %ecx, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmoveq %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmoveq %rdi, %rax
+; X64-NEXT:    movq %rdi, %r15
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    cmoveq %r10, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $512, %ecx # imm = 0x200
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovaeq %rbp, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovaeq %rbp, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    movq %r12, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    movl %r11d, %ecx
+; X64-NEXT:    shrq %cl, %r9
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r11b
+; X64-NEXT:    cmovneq %rbp, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, %rdi
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    shrq %cl, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rbp, %rdi
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    shldq %cl, %rdx, %rax
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    shlq %cl, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rbp, %rax
+; X64-NEXT:    orq %r8, %rax
+; X64-NEXT:    cmpl $128, %r13d
+; X64-NEXT:    cmovaeq %rbx, %rax
+; X64-NEXT:    movq %rsi, %rbx
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rbp, %rbx
+; X64-NEXT:    movq %rbp, %rsi
+; X64-NEXT:    movq %rbp, %r8
+; X64-NEXT:    shlq %cl, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rsi, %rbx
+; X64-NEXT:    testl %r13d, %r13d
+; X64-NEXT:    movq %r15, %r14
+; X64-NEXT:    cmoveq %r15, %rax
+; X64-NEXT:    orq %r9, %rbx
+; X64-NEXT:    cmpl $128, %r11d
+; X64-NEXT:    cmovaeq %rdi, %rbx
+; X64-NEXT:    movq %r12, %rdx
+; X64-NEXT:    movq %r12, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    movl %r12d, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rdi, %rsi
+; X64-NEXT:    testb $64, %r12b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq %r10, %rbp
+; X64-NEXT:    shldq %cl, %r10, %r14
+; X64-NEXT:    movq %r10, %rdi
+; X64-NEXT:    movq %r10, %r15
+; X64-NEXT:    shlq %cl, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rcx, %rbp
+; X64-NEXT:    cmovneq %rdi, %r14
+; X64-NEXT:    testl %r11d, %r11d
+; X64-NEXT:    cmoveq %rdx, %rbx
+; X64-NEXT:    cmpl $128, %r12d
+; X64-NEXT:    movl $0, %edx
+; X64-NEXT:    cmovaeq %rdx, %rsi
+; X64-NEXT:    cmpl $128, %ebp
+; X64-NEXT:    cmovaeq %rdx, %r14
+; X64-NEXT:    orq %rbx, %r14
+; X64-NEXT:    cmpl $256, %r12d # imm = 0x100
+; X64-NEXT:    cmovaeq %rdx, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $256, %r11d # imm = 0x100
+; X64-NEXT:    cmovaeq %rax, %r14
+; X64-NEXT:    cmpl $256, %r12d # imm = 0x100
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovaeq %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r8, %rsi
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movl %r11d, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rdx, %rax
+; X64-NEXT:    testb $64, %r11b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    movq %r10, %r8
+; X64-NEXT:    movq %r13, %r9
+; X64-NEXT:    movl %r9d, %ecx
+; X64-NEXT:    movq (%rsp), %rdx # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rdx, %r8
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    shrq %cl, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r9b
+; X64-NEXT:    cmovneq %rdi, %r8
+; X64-NEXT:    testl %r12d, %r12d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    cmoveq %rsi, %rdi
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    subb %bpl, %cl
+; X64-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; X64-NEXT:    movq %r15, %rbx
+; X64-NEXT:    movq %r15, %r13
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %r15, %rbx
+; X64-NEXT:    movq %r15, %rsi
+; X64-NEXT:    shrq %cl, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rsi, %rbx
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    leal -128(%rbp), %ecx
+; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movq %r13, %rdx
+; X64-NEXT:    shlq %cl, %r13
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovneq %rcx, %r13
+; X64-NEXT:    cmpl $128, %ebp
+; X64-NEXT:    cmovbq %rbx, %r13
+; X64-NEXT:    cmpl $128, %r11d
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovaeq %rcx, %rax
+; X64-NEXT:    cmpl $128, %r9d
+; X64-NEXT:    cmovaeq %rcx, %r8
+; X64-NEXT:    cmpl $256, %r12d # imm = 0x100
+; X64-NEXT:    cmovaeq %rcx, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testl %ebp, %ebp
+; X64-NEXT:    cmoveq %r10, %r13
+; X64-NEXT:    orq %rax, %r13
+; X64-NEXT:    cmpl $256, %r11d # imm = 0x100
+; X64-NEXT:    cmovaeq %r8, %r13
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    testb $64, %bpl
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovneq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovneq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq (%rsp), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, %r12
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movl %r9d, %ecx
+; X64-NEXT:    shldq %cl, %r10, %r12
+; X64-NEXT:    testb $64, %r9b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; X64-NEXT:    movq %r15, %rbx
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    shldq %cl, %rdx, %rbx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    shldq %cl, %r11, %rax
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rsi, %rdi
+; X64-NEXT:    movq %rsi, %r15
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rdx, %rdi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r9d
+; X64-NEXT:    cmovaeq %rbx, %r12
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r8d
+; X64-NEXT:    cmovaeq %rdi, %rax
+; X64-NEXT:    testl %r9d, %r9d
+; X64-NEXT:    cmoveq %rbp, %r12
+; X64-NEXT:    testl %r8d, %r8d
+; X64-NEXT:    movq %r10, %rbx
+; X64-NEXT:    cmoveq %r10, %rax
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %r9d # imm = 0x100
+; X64-NEXT:    cmovaeq %rax, %r12
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovneq %rcx, %rsi
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovaeq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    movl %r10d, %ecx
+; X64-NEXT:    shrdq %cl, %r15, %r8
+; X64-NEXT:    testb $64, %r10b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT:    movq %r11, %rdi
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    shrdq %cl, %rbx, %rdi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rbp, %rax
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rcx, %rbp
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    movq (%rsp), %r11 # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %r11, %rbx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r10d
+; X64-NEXT:    cmovaeq %rdi, %r8
+; X64-NEXT:    orq %rsi, %rax
+; X64-NEXT:    cmpl $128, %ebp
+; X64-NEXT:    cmovaeq %rbx, %rax
+; X64-NEXT:    testl %r10d, %r10d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    cmoveq %rcx, %r8
+; X64-NEXT:    testl %ebp, %ebp
+; X64-NEXT:    cmoveq %rdx, %rax
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %r10d # imm = 0x100
+; X64-NEXT:    cmovaeq %rax, %r8
+; X64-NEXT:    testl %r10d, %r10d
+; X64-NEXT:    cmoveq %r15, %r14
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $512, %r9d # imm = 0x200
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testl %r10d, %r10d
+; X64-NEXT:    cmoveq %rcx, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    cmoveq %rdi, %r13
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $512, %r9d # imm = 0x200
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $512, %r9d # imm = 0x200
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT:    testb $64, %bpl
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    cmovneq %rcx, %rsi
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovneq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    cmovneq %rcx, %r15
+; X64-NEXT:    cmpl $128, %r10d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovaeq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $128, %ebp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    cmovaeq %rcx, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r11, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    movl %r14d, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rax, %rbx
+; X64-NEXT:    testb $64, %r14b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rax, %rsi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rdi, %rax
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rcx, %rbp
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    shldq %cl, %r11, %rdi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r14d
+; X64-NEXT:    cmovaeq %rsi, %rbx
+; X64-NEXT:    testl %r14d, %r14d
+; X64-NEXT:    movq (%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    cmoveq %rcx, %rbx
+; X64-NEXT:    orq %r15, %rax
+; X64-NEXT:    cmpl $128, %ebp
+; X64-NEXT:    cmovaeq %rdi, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %r10d # imm = 0x100
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, %r14
+; X64-NEXT:    testl %ebp, %ebp
+; X64-NEXT:    cmoveq %rdx, %rax
+; X64-NEXT:    testl %r10d, %r10d
+; X64-NEXT:    cmoveq %rdx, %rbx
+; X64-NEXT:    cmpl $256, %ebp # imm = 0x100
+; X64-NEXT:    movl $0, %edx
+; X64-NEXT:    cmovaeq %rdx, %rax
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    cmpl $512, %r9d # imm = 0x200
+; X64-NEXT:    cmovaeq %rax, %rbx
+; X64-NEXT:    testl %r10d, %r10d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    cmoveq %rdi, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %ebp # imm = 0x100
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    testl %r9d, %r9d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %r10d # imm = 0x100
+; X64-NEXT:    cmovaeq %rdx, %rsi
+; X64-NEXT:    testl %ebp, %ebp
+; X64-NEXT:    cmoveq %rdi, %rax
+; X64-NEXT:    orq %rcx, %rsi
+; X64-NEXT:    cmpl $512, %r9d # imm = 0x200
+; X64-NEXT:    cmovaeq %rax, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %ebp # imm = 0x100
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    testl %r9d, %r9d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    cmoveq %r15, %rcx
+; X64-NEXT:    testl %ebp, %ebp
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    cmpl $512, %r9d # imm = 0x200
+; X64-NEXT:    cmovaeq %rax, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %ebp # imm = 0x100
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    testl %r9d, %r9d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    cmoveq %rdx, %rdi
+; X64-NEXT:    testl %ebp, %ebp
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    cmpl $512, %r9d # imm = 0x200
+; X64-NEXT:    cmovaeq %rax, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %ebp # imm = 0x100
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    testl %r9d, %r9d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    cmoveq %r10, %r11
+; X64-NEXT:    testl %ebp, %ebp
+; X64-NEXT:    movq %r14, %rbp
+; X64-NEXT:    cmoveq %r14, %rax
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $512, %r9d # imm = 0x200
+; X64-NEXT:    cmovaeq %rax, %r11
+; X64-NEXT:    testl %r9d, %r9d
+; X64-NEXT:    cmoveq %r10, %r11
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmoveq %rax, %r8
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmoveq %r15, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmoveq %rdx, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmoveq %r14, %r12
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    cmoveq %rax, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    cmpl $256, %r9d # imm = 0x100
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    cmovaeq %rax, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    cmovaeq %rax, %rcx
+; X64-NEXT:    cmpl $512, %r9d # imm = 0x200
+; X64-NEXT:    cmovaeq %rax, %r12
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovaeq %rax, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    cmovaeq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    cmovaeq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    cmovaeq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovaeq %rax, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    addq $1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    adcq $0, %r14
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    adcq $0, %r12
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    adcq $0, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    adcq $0, %r10
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    adcq $0, %r13
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    adcq $0, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    adcq $0, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    adcq $0, %r15
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    adcq $0, %r11
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    adcq $0, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $0, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movabsq $2199023255551, %rcx # imm = 0x1FFFFFFFFFF
+; X64-NEXT:    andq %rcx, %rax
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %rbp, %rcx
+; X64-NEXT:    orq %rbx, %rcx
+; X64-NEXT:    orq %r13, %rax
+; X64-NEXT:    orq %r15, %rax
+; X64-NEXT:    movq %r12, %rbp
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %r12, %rax
+; X64-NEXT:    orq %rcx, %rax
+; X64-NEXT:    movq %r10, %rbp
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %rdx, %rbp
+; X64-NEXT:    orq %rdi, %rbp
+; X64-NEXT:    movq %r14, %rdx
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %r14, %rbp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq %r9, %rdx
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %r11, %rdx
+; X64-NEXT:    orq %r8, %rdx
+; X64-NEXT:    orq %rbp, %rdx
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    orq %rcx, %rdx
+; X64-NEXT:    orq %rax, %rdx
+; X64-NEXT:    je .LBB5_6
+; X64-NEXT:  # %bb.2: # %udiv-preheader
+; X64-NEXT:    movq %r13, %rdx
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movq %rbx, %r13
+; X64-NEXT:    movq %rdi, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r12, %rdx
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    shrq %cl, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r9, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    shrq %cl, %rsi
+; X64-NEXT:    movq (%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    movq %rcx, %rbp
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rcx, %r10
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    shrq %cl, %rbp
+; X64-NEXT:    xorl %edi, %edi
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq %rbp, %rcx
+; X64-NEXT:    cmovneq %rdi, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:    cmovneq %rdi, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %r11
+; X64-NEXT:    cmovneq %rdi, %r11
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    shrq %cl, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    cmovneq %rdi, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    shrq %cl, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rbx, %r15
+; X64-NEXT:    cmovneq %rdi, %r15
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    cmovneq %rdi, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    cmovneq %rdi, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    shrdq %cl, %r10, %rax
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    cmovneq %rbp, %rax
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    shrdq %cl, %r9, %rax
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    cmovneq %rsi, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    movq %r10, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    shrdq %cl, %r12, %rdi
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    cmovneq %rdx, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    leal -256(%rcx), %r8d
+; X64-NEXT:    movq %r14, %rsi
+; X64-NEXT:    movl %r8d, %ecx
+; X64-NEXT:    shrdq %cl, %r9, %rsi
+; X64-NEXT:    movq %r9, %rdx
+; X64-NEXT:    shrq %cl, %rdx
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    cmovneq %rdx, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovneq %rcx, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    cmovaeq %rcx, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovaeq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovaeq %rcx, %r11
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovaeq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    cmovaeq %rcx, %rbp
+; X64-NEXT:    xorl %r11d, %r11d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    leal -512(%rax), %ecx
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    movq %r13, %rdx
+; X64-NEXT:    shlq %cl, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    cmovneq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    cmovaeq %r11, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    subb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Folded Reload
+; X64-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; X64-NEXT:    movq %r9, %rsi
+; X64-NEXT:    shldq %cl, %r14, %rsi
+; X64-NEXT:    shlq %cl, %r14
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r12, %rdx
+; X64-NEXT:    shldq %cl, %r10, %rdx
+; X64-NEXT:    shlq %cl, %r10
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r10, %rdx
+; X64-NEXT:    cmovneq %r14, %rsi
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    leal -128(%rax), %r14d
+; X64-NEXT:    movq %r9, %rbx
+; X64-NEXT:    movl %r14d, %ecx
+; X64-NEXT:    shrq %cl, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r12, %rax
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r14b
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    cmovneq %r11, %rcx
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    cmovneq %r11, %rax
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    cmovbq %rsi, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %r15, %rdx
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    cmovaeq %rcx, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    movq %r15, %r10
+; X64-NEXT:    movl %r8d, %ecx
+; X64-NEXT:    shrq %cl, %r10
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    cmovneq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    leal -512(%rax), %ecx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r11, %rax
+; X64-NEXT:    cmpl $128, %ecx
+; X64-NEXT:    cmovaeq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    leal -256(%rbx), %ecx
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    shrq %cl, %r12
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r11, %r12
+; X64-NEXT:    cmpl $128, %ecx
+; X64-NEXT:    cmovaeq %r11, %r12
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $256, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x100
+; X64-NEXT:    cmovaeq %r11, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movl %r8d, %ecx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    cmovneq %r11, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    subb %r8b, %cl
+; X64-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, %rdi
+; X64-NEXT:    shlq %cl, %rdi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    cmovneq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    movl %r8d, %ecx
+; X64-NEXT:    movq %r15, %r12
+; X64-NEXT:    shrdq %cl, %r15, %rax
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovneq %r10, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq %rbx, %rcx
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    leal -128(%r8), %ecx
+; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq (%rsp), %rbx # 8-byte Reload
+; X64-NEXT:    movq %rbx, %rbp
+; X64-NEXT:    shrq %cl, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r11, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rax, %r15
+; X64-NEXT:    cmovneq %r11, %r15
+; X64-NEXT:    movq %rsi, %rbp
+; X64-NEXT:    movq %rsi, %r10
+; X64-NEXT:    shrdq %cl, %r9, %rbp
+; X64-NEXT:    movq %r9, %rsi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rax, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movl %ebp, %ecx
+; X64-NEXT:    shrq %cl, %r12
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %bpl
+; X64-NEXT:    cmovneq %r11, %r12
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movl %r14d, %ecx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r14b
+; X64-NEXT:    cmovneq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movl %ebp, %ecx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %bpl
+; X64-NEXT:    cmovneq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movl %r14d, %ecx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r14b
+; X64-NEXT:    cmovneq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $256, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x100
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovaeq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    cmpl $128, %ebx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovaeq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    addl $-128, %ecx
+; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movq %r13, %r9
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    # kill: def $r13d killed $r13d killed $r13 def $r13
+; X64-NEXT:    subl $256, %r13d # imm = 0x100
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovaeq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovaeq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    shldq %cl, %r10, %rax
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdi, %rax
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %ebx
+; X64-NEXT:    movq %rbx, %rsi
+; X64-NEXT:    cmovaeq %r15, %rax
+; X64-NEXT:    movq %rax, %r15
+; X64-NEXT:    cmpl $0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    cmoveq %r8, %rdx
+; X64-NEXT:    negl %r13d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    movl %r13d, %ecx
+; X64-NEXT:    shldq %cl, %r9, %rax
+; X64-NEXT:    movq %r9, %rbx
+; X64-NEXT:    movq %r9, %rdi
+; X64-NEXT:    shlq %cl, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r13b
+; X64-NEXT:    cmovneq %rbx, %rax
+; X64-NEXT:    cmpl $128, %r13d
+; X64-NEXT:    cmovaeq %r11, %rax
+; X64-NEXT:    orq %rdx, %rax
+; X64-NEXT:    testl %esi, %esi
+; X64-NEXT:    movq %r15, %rsi
+; X64-NEXT:    cmoveq %rbp, %rsi
+; X64-NEXT:    cmpl $256, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x100
+; X64-NEXT:    cmovbq %rax, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    shlq %cl, %r10
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r13b
+; X64-NEXT:    movq %r10, %r12
+; X64-NEXT:    cmovneq %r11, %r12
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    movl %r15d, %ecx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r15b
+; X64-NEXT:    cmovneq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovneq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    movq %rbx, %rdx
+; X64-NEXT:    shlq %cl, %rdx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    cmovneq %r11, %r10
+; X64-NEXT:    movq (%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    shldq %cl, %rbx, %rax
+; X64-NEXT:    movq %rbx, %r9
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    shrdq %cl, %rbp, %rax
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rax, %r8
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movl %r14d, %ecx
+; X64-NEXT:    movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rbp, %rax
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %r9, %rdi
+; X64-NEXT:    movq %r9, %r14
+; X64-NEXT:    shrdq %cl, %rsi, %rdi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    subb %sil, %cl
+; X64-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; X64-NEXT:    movq %rbp, %rdx
+; X64-NEXT:    movq %rbp, %r9
+; X64-NEXT:    shldq %cl, %rbx, %rdx
+; X64-NEXT:    shlq %cl, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rbx, %rdx
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    leal -128(%r15), %ecx
+; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    shrq %cl, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rbp, %rcx
+; X64-NEXT:    cmovneq %r11, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    leal -128(%rsi), %ecx
+; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    shrq %cl, %r9
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r11, %r9
+; X64-NEXT:    cmpl $128, %esi
+; X64-NEXT:    cmovbq %rdx, %r9
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rbx, %rdx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rcx, %r9
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    cmovaeq %rax, %r15
+; X64-NEXT:    orq %r10, %r8
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    cmovaeq %rdi, %r8
+; X64-NEXT:    movq %r8, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    subb %r13b, %cl
+; X64-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; X64-NEXT:    movq %rbx, %rdx
+; X64-NEXT:    movq %rbx, %r8
+; X64-NEXT:    movq %rbp, %rdi
+; X64-NEXT:    shrdq %cl, %rbp, %rdx
+; X64-NEXT:    shrq %cl, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdi, %rdx
+; X64-NEXT:    orq %r12, %rdx
+; X64-NEXT:    leal -128(%r13), %ebp
+; X64-NEXT:    movq %rbx, %rdi
+; X64-NEXT:    movl %ebp, %ecx
+; X64-NEXT:    shlq %cl, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %bpl
+; X64-NEXT:    movq %rdi, %rcx
+; X64-NEXT:    cmovneq %r11, %rcx
+; X64-NEXT:    cmpl $128, %r13d
+; X64-NEXT:    cmovbq %rdx, %rcx
+; X64-NEXT:    cmpl $0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    cmoveq %rbx, %r15
+; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    cmoveq %rbx, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    cmoveq %r10, %rax
+; X64-NEXT:    testl %r13d, %r13d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    cmoveq %r12, %rcx
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x100
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovaeq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovaeq %r11, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r14, %rdx
+; X64-NEXT:    movq %r14, %rsi
+; X64-NEXT:    movl %r13d, %ecx
+; X64-NEXT:    shlq %cl, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r13b
+; X64-NEXT:    movq %rsi, %r8
+; X64-NEXT:    cmovneq %r11, %r8
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movq %rbx, %r14
+; X64-NEXT:    movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movl %ebp, %ecx
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %bpl
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    cmovneq %r11, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    movq %r9, %rcx
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    movq %r15, %r9
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    shlq %cl, %r9
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r11, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rax, %rbp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    cmovneq %rax, %rbp
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rdi, %rax
+; X64-NEXT:    shrq %cl, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rcx, %rsi
+; X64-NEXT:    cmovneq %rdi, %rax
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    movq %rdx, %r11
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq (%rsp), %rdx # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rdx, %rdi
+; X64-NEXT:    shrq %cl, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdx, %rdi
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    movl $0, %edx
+; X64-NEXT:    cmovaeq %rdx, %rbp
+; X64-NEXT:    cmpl $128, %esi
+; X64-NEXT:    cmovaeq %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $128, %ecx
+; X64-NEXT:    cmovaeq %rdx, %rdi
+; X64-NEXT:    movq %r14, %rdx
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    shrdq %cl, %r10, %rdx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rax, %rdx
+; X64-NEXT:    orq %r8, %rdx
+; X64-NEXT:    cmpl $128, %r13d
+; X64-NEXT:    cmovaeq %rbx, %rdx
+; X64-NEXT:    testl %r13d, %r13d
+; X64-NEXT:    cmoveq %r11, %rdx
+; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    movl $768, %r8d # imm = 0x300
+; X64-NEXT:    subl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    subb %r8b, %cl
+; X64-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    movq %r11, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %r14, %rdx
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rax, %rdx
+; X64-NEXT:    movq %r12, %rbx
+; X64-NEXT:    movq %r12, %rsi
+; X64-NEXT:    movl %r8d, %ecx
+; X64-NEXT:    shlq %cl, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    movl $0, %r12d
+; X64-NEXT:    cmovneq %r12, %rsi
+; X64-NEXT:    orq %rdx, %rsi
+; X64-NEXT:    leal -128(%r8), %eax
+; X64-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r11, %rdx
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    shlq %cl, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %al
+; X64-NEXT:    cmovneq %r12, %rdx
+; X64-NEXT:    cmpl $128, %r8d
+; X64-NEXT:    cmovbq %rsi, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    subb %dl, %cl
+; X64-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; X64-NEXT:    movq %r15, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rax, %rsi
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rax, %rsi
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq %rdx, %r15
+; X64-NEXT:    cmpl $128, %r15d
+; X64-NEXT:    cmovaeq %r9, %rsi
+; X64-NEXT:    testl %r15d, %r15d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    cmoveq %r9, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %rbp, %r10
+; X64-NEXT:    cmpl $256, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x100
+; X64-NEXT:    cmovaeq %rdi, %r10
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    cmpl $128, %r8d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movl $0, %edx
+; X64-NEXT:    cmovaeq %rdx, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movl %r13d, %ecx
+; X64-NEXT:    shldq %cl, %rbx, %rax
+; X64-NEXT:    testb $64, %r13b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    cmovneq %rdx, %rbp
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    shldq %cl, %r11, %r14
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT:    movq %rbp, %rcx
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    cmpl $128, %r13d
+; X64-NEXT:    cmovaeq %r14, %rcx
+; X64-NEXT:    testl %r13d, %r13d
+; X64-NEXT:    cmoveq %rdi, %rcx
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x100
+; X64-NEXT:    cmovaeq %rsi, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    movl %r13d, %ecx
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r13b
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    cmovneq %rax, %rcx
+; X64-NEXT:    movq %rcx, %r10
+; X64-NEXT:    cmpl $128, %r15d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    cmovaeq %rax, %rsi
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    cmovneq %rax, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    shlq %cl, %r12
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rax, %r12
+; X64-NEXT:    xorl %r15d, %r15d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rdx, %rax
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    testb $64, %al
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    cmovneq %r15, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    shlq %cl, %r11
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r15, %r11
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rdi, %rbp
+; X64-NEXT:    testb $64, %al
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    testb $64, %dl
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    cmovneq %r15, %rdi
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movq %r9, %r14
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r15, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rax, %r9
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r13d
+; X64-NEXT:    cmovaeq %r15, %r10
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    leal -768(%rax), %r10d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movl %r10d, %ecx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r10b
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    cmovneq %r15, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $256, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x100
+; X64-NEXT:    cmovaeq %r15, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq %rax, %rdx
+; X64-NEXT:    movl %r10d, %ecx
+; X64-NEXT:    shrq %cl, %rdx
+; X64-NEXT:    testb $64, %r10b
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    cmovneq %r15, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movl %r10d, %ecx
+; X64-NEXT:    shrdq %cl, %rax, %rsi
+; X64-NEXT:    testb $64, %r10b
+; X64-NEXT:    cmovneq %rdx, %rsi
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    cmovaeq %r15, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rbx, %rdx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    orq %r12, %rcx
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    cmovaeq %rdx, %rcx
+; X64-NEXT:    movq %rcx, %r12
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    cmpl $128, %ebx
+; X64-NEXT:    cmovaeq %r15, %r8
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    movq (%rsp), %rdx # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rdx, %rax
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    orq %r11, %rbp
+; X64-NEXT:    cmpl $128, %ebx
+; X64-NEXT:    cmovaeq %rax, %rbp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    cmpl $128, %ebx
+; X64-NEXT:    cmovaeq %r15, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rdx, %r14
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    shrq %cl, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdi, %r14
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %ebx
+; X64-NEXT:    cmovaeq %r14, %r9
+; X64-NEXT:    movq %r9, %r14
+; X64-NEXT:    movb $-128, %r8b
+; X64-NEXT:    subb %r10b, %r8b
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    movl %r8d, %ecx
+; X64-NEXT:    shlq %cl, %r11
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    movq %r11, %rdx
+; X64-NEXT:    cmovneq %r15, %rdx
+; X64-NEXT:    orq %rsi, %rdx
+; X64-NEXT:    leal -128(%r10), %ecx
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rax, %rdi
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    shrq %cl, %rbx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rbx, %rdi
+; X64-NEXT:    cmpl $128, %r10d
+; X64-NEXT:    cmovbq %rdx, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %edx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    cmovaeq %rdx, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdx, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    shlq %cl, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdx, %rsi
+; X64-NEXT:    cmpl $128, %ecx
+; X64-NEXT:    cmovaeq %rdx, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    movq %r12, %rdx
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    testl %r12d, %r12d
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; X64-NEXT:    movl %r8d, %ecx
+; X64-NEXT:    shldq %cl, %r9, %rax
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    cmovneq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmoveq %rax, %r14
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %r15
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x100
+; X64-NEXT:    cmovaeq %rbp, %r15
+; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r13b
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    cmovneq %rcx, %rbp
+; X64-NEXT:    cmpl $128, %r13d
+; X64-NEXT:    cmovaeq %rcx, %rbp
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    cmovneq %rcx, %rsi
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %r11, %rdi
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rbx, %rax
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    orq %rsi, %rdi
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    cmovaeq %rax, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r12d
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    cmpl $0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    cmoveq %r8, %rdi
+; X64-NEXT:    testl %r12d, %r12d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    cmoveq %r9, %rax
+; X64-NEXT:    orq %rbp, %rdi
+; X64-NEXT:    cmpl $256, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x100
+; X64-NEXT:    cmovaeq %rax, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq %r11, %rdi
+; X64-NEXT:    cmoveq %r11, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmoveq %rdx, %rax
+; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    cmoveq %rbp, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $512, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x200
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movl $0, %edx
+; X64-NEXT:    cmovaeq %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movq %r8, %rsi
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl %r13d, %ecx
+; X64-NEXT:    shlq %cl, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %r13b
+; X64-NEXT:    cmovneq %rdx, %rsi
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    cmovaeq %rdx, %r15
+; X64-NEXT:    cmpl $128, %r13d
+; X64-NEXT:    cmovaeq %rdx, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    movq %r13, %r11
+; X64-NEXT:    movq %r10, %r8
+; X64-NEXT:    movl %r8d, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %r12, %r11
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rdi, %rdx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rcx, %rdi
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq %r14, %rbx
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rax, %rbx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    cmovneq %rax, %r10
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movl %r9d, %ecx
+; X64-NEXT:    shrdq %cl, %rbp, %rax
+; X64-NEXT:    testb $64, %r9b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %r13, %rbp
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    shrdq %cl, %r12, %rbp
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r8d
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovaeq %rcx, %r11
+; X64-NEXT:    movl %r9d, %ecx
+; X64-NEXT:    shrdq %cl, %r12, %r13
+; X64-NEXT:    testb $64, %r9b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movb $-128, %r12b
+; X64-NEXT:    movq %rdi, %r8
+; X64-NEXT:    subb %r8b, %r12b
+; X64-NEXT:    movq %r14, %r13
+; X64-NEXT:    movl %r12d, %ecx
+; X64-NEXT:    shlq %cl, %r13
+; X64-NEXT:    testb $64, %r12b
+; X64-NEXT:    movq %r13, %rcx
+; X64-NEXT:    movl $0, %edi
+; X64-NEXT:    cmovneq %rdi, %rcx
+; X64-NEXT:    orq %rdx, %rcx
+; X64-NEXT:    cmpl $128, %r8d
+; X64-NEXT:    cmovaeq %rbx, %rcx
+; X64-NEXT:    testl %r8d, %r8d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    cmoveq %r14, %rcx
+; X64-NEXT:    orq %r15, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    cmpl $256, %ebx # imm = 0x100
+; X64-NEXT:    cmovaeq %rsi, %rcx
+; X64-NEXT:    movq %rax, %rdx
+; X64-NEXT:    orq %r10, %rdx
+; X64-NEXT:    movq %r9, %r10
+; X64-NEXT:    cmpl $128, %r10d
+; X64-NEXT:    cmovaeq %rbp, %rdx
+; X64-NEXT:    testl %ebx, %ebx
+; X64-NEXT:    movq %rbx, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmoveq %rax, %rcx
+; X64-NEXT:    testl %r10d, %r10d
+; X64-NEXT:    cmoveq %rax, %rdx
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %r10d # imm = 0x100
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    cmovaeq %rax, %rdx
+; X64-NEXT:    cmpl $512, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x200
+; X64-NEXT:    cmovbq %rcx, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movq %rdi, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    shldq %cl, %r14, %rdx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movq %rcx, %r15
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    testl %r10d, %r10d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    cmoveq %rbp, %rax
+; X64-NEXT:    cmpl $128, %r8d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovaeq %rcx, %rsi
+; X64-NEXT:    cmpl $128, %r15d
+; X64-NEXT:    cmovaeq %rcx, %rdx
+; X64-NEXT:    cmpl $256, %r10d # imm = 0x100
+; X64-NEXT:    cmovaeq %rcx, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    movq %r14, %rbx
+; X64-NEXT:    movl %r12d, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    shldq %cl, %r9, %rbx
+; X64-NEXT:    testb $64, %r12b
+; X64-NEXT:    cmovneq %r13, %rbx
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    cmpl $128, %r12d
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    testl %r12d, %r12d
+; X64-NEXT:    cmoveq %rdi, %rbx
+; X64-NEXT:    orq %rsi, %rbx
+; X64-NEXT:    movq %r8, %r13
+; X64-NEXT:    cmpl $256, %r13d # imm = 0x100
+; X64-NEXT:    cmovaeq %rdx, %rbx
+; X64-NEXT:    testl %r13d, %r13d
+; X64-NEXT:    cmoveq %rbp, %rbx
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    cmpl $512, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x200
+; X64-NEXT:    cmovaeq %rax, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %r10d # imm = 0x100
+; X64-NEXT:    cmovaeq %r11, %rax
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    cmpl $256, %r13d # imm = 0x100
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    cmovaeq %rax, %rdi
+; X64-NEXT:    cmpl $0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    cmoveq %rdx, %rcx
+; X64-NEXT:    testl %r10d, %r10d
+; X64-NEXT:    cmoveq %r9, %rsi
+; X64-NEXT:    orq %rdi, %rcx
+; X64-NEXT:    cmpl $512, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x200
+; X64-NEXT:    cmovaeq %rsi, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r9, %rsi
+; X64-NEXT:    movq %r15, %rcx
+; X64-NEXT:    shlq %cl, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movl $0, %edi
+; X64-NEXT:    cmovneq %rdi, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    movq %r11, %rsi
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    shlq %cl, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdi, %rsi
+; X64-NEXT:    xorl %ebx, %ebx
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r9, %rbp
+; X64-NEXT:    movl %r12d, %ecx
+; X64-NEXT:    movq %r14, %rdi
+; X64-NEXT:    shrdq %cl, %r14, %rbp
+; X64-NEXT:    testb $64, %r12b
+; X64-NEXT:    movq %r12, %r8
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r10d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    cmovaeq %rbx, %rsi
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    cmovneq %rbx, %r14
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    shldq %cl, %rdx, %rbx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    movq (%rsp), %rdx # 8-byte Reload
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rax, %rdx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    shldq %cl, %r9, %rax
+; X64-NEXT:    movq %r9, %r12
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %r10, %r15
+; X64-NEXT:    cmpl $128, %r15d
+; X64-NEXT:    cmovaeq %r14, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpl $128, %r8d
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    cmovaeq %rax, %rbp
+; X64-NEXT:    cmpl $256, %r15d # imm = 0x100
+; X64-NEXT:    cmovaeq %rax, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, %r10
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %r13, %r10
+; X64-NEXT:    movq %r13, %r9
+; X64-NEXT:    shrq %cl, %r9
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    movl %ecx, %r8d
+; X64-NEXT:    cmovneq %r9, %r10
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    shrdq %cl, %rdi, %rax
+; X64-NEXT:    shrq %cl, %rdi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdi, %rax
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    cmpl $128, %r14d
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    cmpl $128, %ecx
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    testl %r14d, %r14d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    cmoveq %r11, %r10
+; X64-NEXT:    testl %ecx, %ecx
+; X64-NEXT:    cmoveq %r12, %rax
+; X64-NEXT:    orq %rbp, %r10
+; X64-NEXT:    cmpl $256, %r14d # imm = 0x100
+; X64-NEXT:    cmovaeq %rax, %r10
+; X64-NEXT:    testl %r14d, %r14d
+; X64-NEXT:    cmoveq %r11, %r10
+; X64-NEXT:    movq %r11, %rdx
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $512, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x200
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %r15d # imm = 0x100
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    movl $0, %ebp
+; X64-NEXT:    cmovaeq %rbp, %r11
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movl %r14d, %ecx
+; X64-NEXT:    shldq %cl, %rdx, %rax
+; X64-NEXT:    testb $64, %r14b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    cmovneq %rbp, %r9
+; X64-NEXT:    xorl %ebp, %ebp
+; X64-NEXT:    movq %r13, %rdx
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    shldq %cl, %rsi, %rdx
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movq %r8, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    movl %r13d, %ecx
+; X64-NEXT:    shldq %cl, %r12, %rsi
+; X64-NEXT:    testb $64, %r13b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    cmovneq %rbp, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    movq %r12, %rbp
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    shldq %cl, %r15, %rbp
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; X64-NEXT:    orq %rax, %r9
+; X64-NEXT:    cmpl $128, %r14d
+; X64-NEXT:    cmovaeq %rdx, %r9
+; X64-NEXT:    orq %rsi, %rdi
+; X64-NEXT:    cmpl $128, %r13d
+; X64-NEXT:    cmovaeq %rbp, %rdi
+; X64-NEXT:    testl %r14d, %r14d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmoveq %rax, %r9
+; X64-NEXT:    testl %r13d, %r13d
+; X64-NEXT:    cmoveq %r8, %rdi
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %r14d # imm = 0x100
+; X64-NEXT:    cmovaeq %rdi, %r9
+; X64-NEXT:    testl %r14d, %r14d
+; X64-NEXT:    movq %r14, %rbp
+; X64-NEXT:    cmoveq %rax, %r9
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $512, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x200
+; X64-NEXT:    cmovaeq %r11, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    testl %esi, %esi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    cmoveq %r14, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    cmpl $256, %edx # imm = 0x100
+; X64-NEXT:    cmovaeq %rax, %rcx
+; X64-NEXT:    cmpl $0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    movq %r15, %rdx
+; X64-NEXT:    cmoveq %r15, %rcx
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    cmpl $512, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x200
+; X64-NEXT:    cmovaeq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r12, %rdi
+; X64-NEXT:    movq %rbp, %r15
+; X64-NEXT:    movl %r15d, %ecx
+; X64-NEXT:    shldq %cl, %rdx, %rdi
+; X64-NEXT:    testb $64, %r15b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    movl %r13d, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rbp, %rax
+; X64-NEXT:    testb $64, %r13b
+; X64-NEXT:    movq %r13, %r12
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %esi
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r15d
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovaeq %rcx, %rdi
+; X64-NEXT:    cmpl $128, %r12d
+; X64-NEXT:    cmovaeq %rcx, %rax
+; X64-NEXT:    testl %esi, %esi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    cmoveq %rsi, %r13
+; X64-NEXT:    cmpl $256, %r15d # imm = 0x100
+; X64-NEXT:    cmovaeq %rcx, %rdi
+; X64-NEXT:    cmpl $0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    cmoveq %r8, %rbx
+; X64-NEXT:    cmpl $0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    cmoveq %r11, %rcx
+; X64-NEXT:    orq %rax, %rbx
+; X64-NEXT:    cmpl $256, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x100
+; X64-NEXT:    cmovaeq %rcx, %rbx
+; X64-NEXT:    movq (%rsp), %rdx # 8-byte Reload
+; X64-NEXT:    movl %r12d, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rax, %rdx
+; X64-NEXT:    testb $64, %r12b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovneq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    shldq %cl, %rbp, %r11
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    shldq %cl, %r14, %rax
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    testl %ebp, %ebp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    cmoveq %rsi, %rdx
+; X64-NEXT:    cmpl $128, %ecx
+; X64-NEXT:    movq %rcx, %r14
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovaeq %rcx, %rax
+; X64-NEXT:    orq %rdx, %rax
+; X64-NEXT:    cmpl $256, %ebp # imm = 0x100
+; X64-NEXT:    cmovaeq %r13, %rax
+; X64-NEXT:    cmpl $0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    cmoveq %r8, %rbx
+; X64-NEXT:    testl %ebp, %ebp
+; X64-NEXT:    cmoveq %rsi, %rax
+; X64-NEXT:    orq %rdi, %rbx
+; X64-NEXT:    cmpl $512, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Reload
+; X64-NEXT:    # imm = 0x200
+; X64-NEXT:    cmovaeq %rax, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    movl %r15d, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rdx, %rax
+; X64-NEXT:    testb $64, %r15b
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovneq %rcx, %rbp
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rdx, %rsi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    movq %r14, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rdi, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    testb $64, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovneq %rcx, %r14
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    cmpl $128, %r12d
+; X64-NEXT:    cmovaeq %r11, %rcx
+; X64-NEXT:    orq %rax, %rbp
+; X64-NEXT:    cmpl $128, %r15d
+; X64-NEXT:    cmovaeq %rsi, %rbp
+; X64-NEXT:    testl %r12d, %r12d
+; X64-NEXT:    movq (%rsp), %r11 # 8-byte Reload
+; X64-NEXT:    cmoveq %r11, %rcx
+; X64-NEXT:    testl %r15d, %r15d
+; X64-NEXT:    cmoveq %r13, %rbp
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %r8d # imm = 0x100
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    shldq %cl, %rdi, %rax
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    cmpl $256, %r15d # imm = 0x100
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovaeq %rcx, %rbp
+; X64-NEXT:    testl %r8d, %r8d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    cmoveq %rcx, %rsi
+; X64-NEXT:    movq %r14, %rdi
+; X64-NEXT:    movq %r8, %r14
+; X64-NEXT:    orq %rdx, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    cmpl $128, %edx
+; X64-NEXT:    cmovaeq %rax, %rdi
+; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmpl $256, %eax # imm = 0x100
+; X64-NEXT:    cmovaeq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    cmoveq %r13, %rdi
+; X64-NEXT:    orq %rbp, %rsi
+; X64-NEXT:    cmpl $512, %r14d # imm = 0x200
+; X64-NEXT:    cmovaeq %rdi, %rsi
+; X64-NEXT:    testl %r14d, %r14d
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmoveq %rcx, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmoveq %r11, %r9
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    cmoveq %r13, %rsi
+; X64-NEXT:    cmpl $256, %r14d # imm = 0x100
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movl $0, %edx
+; X64-NEXT:    cmovaeq %rdx, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    cmovaeq %rdx, %rcx
+; X64-NEXT:    cmpl $512, %r14d # imm = 0x200
+; X64-NEXT:    cmovaeq %rdx, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovaeq %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovaeq %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovaeq %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    cmovaeq %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    cmovaeq %rdx, %rax
+; X64-NEXT:    movl $0, %edx
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    cmovaeq %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    addq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movabsq $2199023255551, %rax # imm = 0x1FFFFFFFFFF
+; X64-NEXT:    adcq %rax, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB5_3: # %udiv-do-while
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shldq $1, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq $1, %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shldq $1, %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq $1, %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shldq $1, %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq $1, %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shldq $1, %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq $1, %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shldq $1, %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq $1, %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shldq $1, %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq $1, %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shldq $1, %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    shldq $1, %r12, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    shldq $1, %r11, %r12
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    shrq $40, %rax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    leaq (%rax,%r11,2), %r11
+; X64-NEXT:    shldq $1, %r8, %rbp
+; X64-NEXT:    orq %r14, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    shldq $1, %rsi, %r8
+; X64-NEXT:    orq %rcx, %r8
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    shldq $1, %rbx, %rsi
+; X64-NEXT:    orq %rcx, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq $1, %rax, %rbx
+; X64-NEXT:    orq %rcx, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    shldq $1, %rdi, %rax
+; X64-NEXT:    orq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    shldq $1, %r15, %rdi
+; X64-NEXT:    orq %rcx, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq $1, %rax, %r15
+; X64-NEXT:    orq %rcx, %r15
+; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    shldq $1, %r13, %rax
+; X64-NEXT:    orq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shldq $1, %rdx, %r13
+; X64-NEXT:    orq %rcx, %r13
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq $1, %rax, %rdx
+; X64-NEXT:    orq %rcx, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    shldq $1, %r9, %rax
+; X64-NEXT:    orq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    shldq $1, %r10, %r9
+; X64-NEXT:    orq %rcx, %r9
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq $1, %rax, %r10
+; X64-NEXT:    orq %rcx, %r10
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shldq $1, %rdx, %rax
+; X64-NEXT:    orq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    shldq $1, %rax, %rdx
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    addq %rax, %rax
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmpq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq %r12, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    shlq $23, %rcx
+; X64-NEXT:    sarq $63, %rcx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rcx, %rbp
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, %rdi
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, %r12
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, %r10
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, %r9
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, %r14
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, %rdx
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, %r13
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, %r15
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, %rbx
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, %rsi
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    subq %rcx, %r11
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq %r8, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq %rsi, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq %rbx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq %r15, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq %r13, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq %rdx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq %r14, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq %r9, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq %r10, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq %r12, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq %rdi, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq %rbp, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    adcq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    adcq $-1, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rdi
+; X64-NEXT:    adcq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    adcq $-1, %r10
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    adcq $-1, %r15
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    adcq $-1, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    adcq $-1, %r13
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    adcq $-1, %r12
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rbp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    adcq $-1, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    adcq $-1, %r11
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movabsq $2199023255551, %rbx # imm = 0x1FFFFFFFFFF
+; X64-NEXT:    adcq %rbx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movabsq $2199023255551, %r14 # imm = 0x1FFFFFFFFFF
+; X64-NEXT:    andq %r14, %rax
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %rdx, %rcx
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %rsi, %rcx
+; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %r15, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %r12, %rax
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %rdi, %rax
+; X64-NEXT:    orq %rcx, %rax
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r10, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %r11, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %r13, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %r9, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %rbp, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    orq %r8, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    orq %rcx, %rdx
+; X64-NEXT:    orq %r14, %rdx
+; X64-NEXT:    orq %rax, %rdx
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    movl $0, %edx
+; X64-NEXT:    jne .LBB5_3
+; X64-NEXT:    jmp .LBB5_4
+; X64-NEXT:  .LBB5_1:
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    jmp .LBB5_8
+; X64-NEXT:  .LBB5_6:
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:  .LBB5_4: # %udiv-loop-exit
+; X64-NEXT:    shldq $1, %r8, %rbp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    orq %rax, %rbp
+; X64-NEXT:    shldq $1, %rsi, %r8
+; X64-NEXT:    orq %rax, %r8
+; X64-NEXT:    shldq $1, %rbx, %rsi
+; X64-NEXT:    orq %rax, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    shldq $1, %rcx, %rbx
+; X64-NEXT:    orq %rax, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    shldq $1, %rdi, %rcx
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    shldq $1, %r15, %rdi
+; X64-NEXT:    orq %rax, %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    shldq $1, %rcx, %r15
+; X64-NEXT:    orq %rax, %r15
+; X64-NEXT:    shldq $1, %r13, %rcx
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    shldq $1, %rsi, %r13
+; X64-NEXT:    orq %rax, %r13
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    shldq $1, %rcx, %rsi
+; X64-NEXT:    orq %rax, %rsi
+; X64-NEXT:    shldq $1, %r9, %rcx
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    shldq $1, %r10, %r9
+; X64-NEXT:    orq %rax, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    shldq $1, %rbx, %r10
+; X64-NEXT:    orq %rax, %r10
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    shldq $1, %rdx, %rbx
+; X64-NEXT:    orq %rax, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    shldq $1, %r11, %rdx
+; X64-NEXT:    orq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    addq %r11, %r11
+; X64-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; X64-NEXT:    movq %rsi, %r10
+; X64-NEXT:    movq %rcx, %rdi
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movq %r15, %r8
+; X64-NEXT:    movq %r13, %r14
+; X64-NEXT:    movq %rbx, %r15
+; X64-NEXT:    movq %rax, %r13
+; X64-NEXT:  .LBB5_8: # %udiv-end
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    movq %r11, %rsi
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    mulq %r11
+; X64-NEXT:    movq %rdx, %r8
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    mulq %r11
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    addq %r8, %rcx
+; X64-NEXT:    adcq $0, %rdi
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    mulq %rbx
+; X64-NEXT:    movq %rdx, %r13
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    addq %rcx, %r11
+; X64-NEXT:    adcq %rdi, %r13
+; X64-NEXT:    setb %cl
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    mulq %rbx
+; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    movq %rax, %r12
+; X64-NEXT:    addq %r13, %r12
+; X64-NEXT:    movzbl %cl, %eax
+; X64-NEXT:    adcq %rax, %r10
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    addq %rdi, %rcx
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    mulq %rbx
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %rbp, %rdi
+; X64-NEXT:    setb %cl
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    mulq %rbx
+; X64-NEXT:    movq %rdx, %r9
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    addq %rdi, %rsi
+; X64-NEXT:    movzbl %cl, %eax
+; X64-NEXT:    adcq %rax, %r9
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    adcq %r11, %r9
+; X64-NEXT:    adcq $0, %r12
+; X64-NEXT:    adcq $0, %r10
+; X64-NEXT:    movq %r8, %rbx
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movq %r15, %r13
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq %rdx, %r8
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    addq %r8, %rcx
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    adcq %rbp, %rbx
+; X64-NEXT:    setb %r8b
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %rbx, %rdi
+; X64-NEXT:    movzbl %r8b, %eax
+; X64-NEXT:    adcq %rax, %rbp
+; X64-NEXT:    addq %rsi, %r11
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %r9, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq $0, %rdi
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    addq %r12, %rdi
+; X64-NEXT:    adcq %r10, %rbp
+; X64-NEXT:    movq %r15, %r14
+; X64-NEXT:    setb %r8b
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    mulq %r13
+; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    mulq %r13
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %r10, %rbx
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    mulq %r14
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    addq %rbx, %rax
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    adcq %rsi, %rcx
+; X64-NEXT:    setb %sil
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    mulq %r14
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    movzbl %sil, %ecx
+; X64-NEXT:    adcq %rcx, %rdx
+; X64-NEXT:    addq %rdi, %r9
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %rbp, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movzbl %r8b, %ecx
+; X64-NEXT:    adcq %rcx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq $0, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    mulq %r13
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    mulq %r13
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rdi, %rbx
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %r12
+; X64-NEXT:    addq %rbx, %r12
+; X64-NEXT:    adcq %rsi, %rbp
+; X64-NEXT:    setb %bl
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    addq %rbp, %r9
+; X64-NEXT:    movzbl %bl, %eax
+; X64-NEXT:    adcq %rax, %r10
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    mulq %r13
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    mulq %r13
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %rbp
+; X64-NEXT:    addq %rsi, %rbp
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movq %rdi, %rsi
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    addq %rbp, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %rbx, %rdi
+; X64-NEXT:    setb %cl
+; X64-NEXT:    movq %r11, %r13
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %r11
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    addq %rdi, %rsi
+; X64-NEXT:    movzbl %cl, %eax
+; X64-NEXT:    adcq %rax, %r11
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    adcq %r12, %r11
+; X64-NEXT:    adcq $0, %r9
+; X64-NEXT:    adcq $0, %r10
+; X64-NEXT:    movq %r8, %rbx
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rcx, %r12
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    addq %rdi, %rcx
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movq %r14, %rdi
+; X64-NEXT:    mulq %r14
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    adcq %rbp, %rbx
+; X64-NEXT:    setb %r14b
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdi, %r13
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %rbx, %rdi
+; X64-NEXT:    movzbl %r14b, %eax
+; X64-NEXT:    adcq %rax, %rbp
+; X64-NEXT:    addq %rsi, %r8
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %r11, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq $0, %rdi
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    addq %r9, %rdi
+; X64-NEXT:    adcq %r10, %rbp
+; X64-NEXT:    setb %r8b
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    mulq %r12
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    mulq %r12
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rcx, %rbx
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    mulq %r13
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r10
+; X64-NEXT:    addq %rbx, %r10
+; X64-NEXT:    adcq %rsi, %rcx
+; X64-NEXT:    setb %bl
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    mulq %r13
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    movzbl %bl, %ecx
+; X64-NEXT:    adcq %rcx, %rdx
+; X64-NEXT:    addq %rdi, %r9
+; X64-NEXT:    adcq %rbp, %r10
+; X64-NEXT:    movzbl %r8b, %ecx
+; X64-NEXT:    adcq %rcx, %rax
+; X64-NEXT:    adcq $0, %rdx
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    movq %r15, %r13
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r14
+; X64-NEXT:    movq %r11, %rbp
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rsi, %r8
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rcx, %rbx
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    addq %rbx, %r11
+; X64-NEXT:    adcq %rsi, %rcx
+; X64-NEXT:    setb %bl
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdi, %r10
+; X64-NEXT:    movq %rdx, %r9
+; X64-NEXT:    movq %rax, %r12
+; X64-NEXT:    addq %rcx, %r12
+; X64-NEXT:    movzbl %bl, %eax
+; X64-NEXT:    adcq %rax, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %rbp
+; X64-NEXT:    addq %rcx, %rbp
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rdi, %r8
+; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    addq %rbp, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %rbx, %rdi
+; X64-NEXT:    setb %bl
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    addq %rdi, %rcx
+; X64-NEXT:    movzbl %bl, %eax
+; X64-NEXT:    adcq %rax, %r10
+; X64-NEXT:    addq %r14, %rcx
+; X64-NEXT:    adcq %r11, %r10
+; X64-NEXT:    adcq $0, %r12
+; X64-NEXT:    adcq $0, %r9
+; X64-NEXT:    movq %r8, %rbx
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %r11
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    movq %rsi, %r14
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    addq %r11, %rsi
+; X64-NEXT:    adcq $0, %rdi
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    addq %rsi, %rax
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    adcq %rdi, %rbx
+; X64-NEXT:    setb %r11b
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %r15
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %rbx, %rdi
+; X64-NEXT:    movzbl %r11b, %eax
+; X64-NEXT:    adcq %rax, %r15
+; X64-NEXT:    addq %rcx, %r8
+; X64-NEXT:    adcq %r10, %rsi
+; X64-NEXT:    movq %rsi, %r14
+; X64-NEXT:    adcq $0, %rdi
+; X64-NEXT:    adcq $0, %r15
+; X64-NEXT:    addq %r12, %rdi
+; X64-NEXT:    adcq %r9, %r15
+; X64-NEXT:    setb %r10b
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %r9
+; X64-NEXT:    movq %rax, %r12
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %r9, %rbx
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r13
+; X64-NEXT:    addq %rbx, %r13
+; X64-NEXT:    adcq %rsi, %rcx
+; X64-NEXT:    setb %bl
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %r11
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    addq %rcx, %r9
+; X64-NEXT:    movzbl %bl, %eax
+; X64-NEXT:    adcq %rax, %r11
+; X64-NEXT:    addq %rdi, %r12
+; X64-NEXT:    adcq %r15, %r13
+; X64-NEXT:    movzbl %r10b, %eax
+; X64-NEXT:    adcq %rax, %r9
+; X64-NEXT:    adcq $0, %r11
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    addq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq $0, %r12
+; X64-NEXT:    adcq $0, %r13
+; X64-NEXT:    adcq $0, %r9
+; X64-NEXT:    adcq $0, %r11
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; X64-NEXT:    setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %r8
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdi, %r15
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %rbp
+; X64-NEXT:    addq %r8, %rbp
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    addq %rbp, %r8
+; X64-NEXT:    adcq %rbx, %rcx
+; X64-NEXT:    setb %bl
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdi, %rbp
+; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    movq %rax, %r10
+; X64-NEXT:    addq %rcx, %r10
+; X64-NEXT:    movzbl %bl, %eax
+; X64-NEXT:    adcq %rax, %r14
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    movq %r15, %rsi
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %rcx, %rdi
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %rbx, %rcx
+; X64-NEXT:    setb %sil
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %r12
+; X64-NEXT:    addq %rcx, %r12
+; X64-NEXT:    movzbl %sil, %eax
+; X64-NEXT:    adcq %rax, %rbx
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; X64-NEXT:    adcq %r8, %rbx
+; X64-NEXT:    adcq $0, %r10
+; X64-NEXT:    adcq $0, %r14
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    addq %rcx, %rsi
+; X64-NEXT:    adcq $0, %rdi
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    addq %rsi, %rax
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    adcq %rdi, %rbp
+; X64-NEXT:    setb %r13b
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %rbp, %rdi
+; X64-NEXT:    movzbl %r13b, %eax
+; X64-NEXT:    adcq %rax, %rcx
+; X64-NEXT:    addq %r12, %r8
+; X64-NEXT:    movq %r8, %r15
+; X64-NEXT:    adcq %rbx, %rsi
+; X64-NEXT:    movq %rsi, %r12
+; X64-NEXT:    adcq $0, %rdi
+; X64-NEXT:    adcq $0, %rcx
+; X64-NEXT:    addq %r10, %rdi
+; X64-NEXT:    adcq %r14, %rcx
+; X64-NEXT:    setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    mulq %rbx
+; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    movq %rax, %r13
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    mulq %rbx
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %r10, %rbx
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    addq %rbx, %rax
+; X64-NEXT:    movq %rax, %r10
+; X64-NEXT:    adcq %rbp, %r14
+; X64-NEXT:    setb %bl
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    addq %r14, %rax
+; X64-NEXT:    movzbl %bl, %esi
+; X64-NEXT:    adcq %rsi, %rdx
+; X64-NEXT:    addq %rdi, %r13
+; X64-NEXT:    adcq %rcx, %r10
+; X64-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X64-NEXT:    adcq %rcx, %rax
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    adcq $0, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    addq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    adcq %r9, %r15
+; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %r11, %r12
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; X64-NEXT:    adcq %rax, %r13
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq $0, %r10
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq $0, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq $0, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    mulq %r14
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    mulq %r14
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rcx, %rbx
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rdi, %r12
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rcx, %r9
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r10
+; X64-NEXT:    addq %rbx, %r10
+; X64-NEXT:    adcq %rsi, %rcx
+; X64-NEXT:    setb %bl
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq %rdx, %r13
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    addq %rcx, %r11
+; X64-NEXT:    movzbl %bl, %eax
+; X64-NEXT:    adcq %rax, %r13
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    mulq %r14
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    mulq %r14
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %rbp
+; X64-NEXT:    addq %rcx, %rbp
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    addq %rbp, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %rbx, %rdi
+; X64-NEXT:    setb %bl
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    addq %rdi, %rcx
+; X64-NEXT:    movzbl %bl, %eax
+; X64-NEXT:    adcq %rax, %r14
+; X64-NEXT:    addq %r8, %rcx
+; X64-NEXT:    adcq %r10, %r14
+; X64-NEXT:    adcq $0, %r11
+; X64-NEXT:    adcq $0, %r13
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    movq %rsi, %rdi
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    addq %rbx, %rsi
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    addq %rsi, %rax
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    adcq %rbp, %rbx
+; X64-NEXT:    setb %r15b
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %rbx, %rdi
+; X64-NEXT:    movzbl %r15b, %eax
+; X64-NEXT:    adcq %rax, %rbp
+; X64-NEXT:    addq %rcx, %r9
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %r14, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq $0, %rdi
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    addq %r11, %rdi
+; X64-NEXT:    adcq %r13, %rbp
+; X64-NEXT:    setb %r9b
+; X64-NEXT:    movq %r12, %r15
+; X64-NEXT:    movq %r12, %rax
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r12
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rcx, %rbx
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    addq %rbx, %rax
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    adcq %rsi, %rcx
+; X64-NEXT:    setb %sil
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    movq %r11, %r13
+; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq %rax, %r14
+; X64-NEXT:    addq %rcx, %r14
+; X64-NEXT:    movzbl %sil, %eax
+; X64-NEXT:    adcq %rax, %rdx
+; X64-NEXT:    addq %rdi, %r12
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %rbp, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movzbl %r9b, %eax
+; X64-NEXT:    adcq %rax, %r14
+; X64-NEXT:    adcq $0, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    imulq %rbx, %rsi
+; X64-NEXT:    addq %rdx, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    imulq %r9, %rax
+; X64-NEXT:    addq %rsi, %rax
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    imulq %rbp, %r10
+; X64-NEXT:    addq %rdx, %r10
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    imulq %rsi, %r8
+; X64-NEXT:    addq %r10, %r8
+; X64-NEXT:    addq %r11, %rdi
+; X64-NEXT:    adcq %rcx, %r8
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movq %rbx, %r10
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rcx, %rbx
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    addq %rbx, %r9
+; X64-NEXT:    adcq %rbp, %rcx
+; X64-NEXT:    setb %bl
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    addq %rcx, %r11
+; X64-NEXT:    movzbl %bl, %eax
+; X64-NEXT:    adcq %rax, %r10
+; X64-NEXT:    addq %rdi, %r11
+; X64-NEXT:    adcq %r8, %r10
+; X64-NEXT:    movq %r15, %rsi
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rax, %r15
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    imulq %r12, %rsi
+; X64-NEXT:    addq %rdx, %rsi
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    imulq %rcx, %rax
+; X64-NEXT:    addq %rsi, %rax
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    imulq %rbp, %rax
+; X64-NEXT:    addq %rdx, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    imulq %rdi, %rsi
+; X64-NEXT:    addq %rax, %rsi
+; X64-NEXT:    addq %r15, %r8
+; X64-NEXT:    adcq %rbx, %rsi
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %r13
+; X64-NEXT:    movq %rax, %r15
+; X64-NEXT:    movq %r12, %rax
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %rbp
+; X64-NEXT:    addq %r13, %rbp
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdi, %r13
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %rbp, %rdi
+; X64-NEXT:    adcq %rbx, %rcx
+; X64-NEXT:    setb %bl
+; X64-NEXT:    movq %r12, %rax
+; X64-NEXT:    mulq %r13
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    movzbl %bl, %ecx
+; X64-NEXT:    adcq %rcx, %rdx
+; X64-NEXT:    addq %r8, %rax
+; X64-NEXT:    adcq %rsi, %rdx
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; X64-NEXT:    adcq %r9, %rdi
+; X64-NEXT:    adcq %r11, %rax
+; X64-NEXT:    adcq %r10, %rdx
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %r14, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r12
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rsi, %rbp
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rcx, %rbx
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    addq %rbx, %r8
+; X64-NEXT:    adcq %rsi, %rcx
+; X64-NEXT:    setb %bl
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdi, %r14
+; X64-NEXT:    movq %rdx, %r9
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    addq %rcx, %r11
+; X64-NEXT:    movzbl %bl, %eax
+; X64-NEXT:    adcq %rax, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    movq %rbp, %rsi
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %rcx, %rdi
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    mulq %r14
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %rbx, %rsi
+; X64-NEXT:    setb %bl
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    mulq %r14
+; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    addq %rsi, %rcx
+; X64-NEXT:    movzbl %bl, %eax
+; X64-NEXT:    adcq %rax, %r14
+; X64-NEXT:    addq %r12, %rcx
+; X64-NEXT:    adcq %r8, %r14
+; X64-NEXT:    adcq $0, %r11
+; X64-NEXT:    adcq $0, %r9
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %r8
+; X64-NEXT:    movq %rax, %r10
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    movq %rbp, %r12
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    addq %r8, %rsi
+; X64-NEXT:    adcq $0, %rdi
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    addq %rsi, %rax
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    adcq %rdi, %rbx
+; X64-NEXT:    setb %r8b
+; X64-NEXT:    movq %r12, %rax
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %r12
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %rbx, %rdi
+; X64-NEXT:    movzbl %r8b, %eax
+; X64-NEXT:    adcq %rax, %r12
+; X64-NEXT:    addq %rcx, %r10
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %r14, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq $0, %rdi
+; X64-NEXT:    adcq $0, %r12
+; X64-NEXT:    addq %r11, %rdi
+; X64-NEXT:    adcq %r9, %r12
+; X64-NEXT:    setb %r14b
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %r8
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %r8, %rbx
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %r8
+; X64-NEXT:    addq %rbx, %rax
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    adcq %rsi, %r8
+; X64-NEXT:    setb %sil
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    addq %r8, %r11
+; X64-NEXT:    movzbl %sil, %eax
+; X64-NEXT:    adcq %rax, %rdx
+; X64-NEXT:    addq %rdi, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %r12, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movzbl %r14b, %eax
+; X64-NEXT:    adcq %rax, %r11
+; X64-NEXT:    adcq $0, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    imulq %r15, %rdi
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rax, %r10
+; X64-NEXT:    addq %rdi, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    imulq %rsi, %rcx
+; X64-NEXT:    addq %rdx, %rcx
+; X64-NEXT:    movq %rcx, %r14
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movq %r9, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    imulq %rdi, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    addq %rcx, %rdx
+; X64-NEXT:    imulq %r9, %r13
+; X64-NEXT:    addq %rdx, %r13
+; X64-NEXT:    addq %r10, %r8
+; X64-NEXT:    adcq %r14, %r13
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rdi, %r10
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %rcx, %rdi
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rsi, %r9
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %r14
+; X64-NEXT:    addq %rdi, %r14
+; X64-NEXT:    adcq %rbx, %rsi
+; X64-NEXT:    setb %cl
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq %rdx, %r12
+; X64-NEXT:    movq %rax, %r15
+; X64-NEXT:    addq %rsi, %r15
+; X64-NEXT:    movzbl %cl, %eax
+; X64-NEXT:    adcq %rax, %r12
+; X64-NEXT:    addq %r8, %r15
+; X64-NEXT:    adcq %r13, %r12
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    imulq %rax, %rbp
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rax, %r13
+; X64-NEXT:    addq %rbp, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    imulq %r8, %rcx
+; X64-NEXT:    addq %rdx, %rcx
+; X64-NEXT:    movq %rcx, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    imulq %r9, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rax, %r10
+; X64-NEXT:    addq %rcx, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    imulq %rbp, %rax
+; X64-NEXT:    addq %rdx, %rax
+; X64-NEXT:    addq %r13, %r10
+; X64-NEXT:    adcq %rbx, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %r13
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %rbx, %rdi
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    movq %r8, %rbx
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    addq %rdi, %r8
+; X64-NEXT:    adcq %rsi, %rcx
+; X64-NEXT:    setb %sil
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    mulq %rbx
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    movzbl %sil, %ecx
+; X64-NEXT:    adcq %rcx, %rdx
+; X64-NEXT:    addq %r10, %rax
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    adcq %r14, %r8
+; X64-NEXT:    adcq %r15, %rax
+; X64-NEXT:    adcq %r12, %rdx
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT:    adcq %r11, %rax
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, %r9
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    subq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; X64-NEXT:    movq (%rsp), %r14 # 8-byte Reload
+; X64-NEXT:    sbbq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    sbbq %r9, %r11
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    sbbq %rsi, %r10
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    sbbq %rdi, %r12
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    sbbq %rbp, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    sbbq %r13, %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    sbbq %r8, %r13
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    sbbq %rax, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    sbbq %rdx, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    xorq %rax, %rsi
+; X64-NEXT:    xorq %rax, %r8
+; X64-NEXT:    xorq %rax, %r13
+; X64-NEXT:    xorq %rax, %rdi
+; X64-NEXT:    xorq %rax, %r9
+; X64-NEXT:    xorq %rax, %r12
+; X64-NEXT:    xorq %rax, %r10
+; X64-NEXT:    xorq %rax, %r11
+; X64-NEXT:    xorq %rax, %r14
+; X64-NEXT:    xorq %rax, %r15
+; X64-NEXT:    xorq %rax, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    xorq %rax, %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    xorq %rax, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    xorq %rax, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    xorq %rax, %rbp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    xorq %rax, %rdx
+; X64-NEXT:    subq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rax, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rax, %rbx
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    sbbq %rax, %rcx
+; X64-NEXT:    movq %rcx, %rbx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    sbbq %rax, %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    sbbq %rax, %rcx
+; X64-NEXT:    sbbq %rax, %r15
+; X64-NEXT:    sbbq %rax, %r14
+; X64-NEXT:    sbbq %rax, %r11
+; X64-NEXT:    sbbq %rax, %r10
+; X64-NEXT:    sbbq %rax, %r12
+; X64-NEXT:    sbbq %rax, %r9
+; X64-NEXT:    sbbq %rax, %rdi
+; X64-NEXT:    sbbq %rax, %r13
+; X64-NEXT:    sbbq %rax, %r8
+; X64-NEXT:    sbbq %rax, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, (%rax)
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, 8(%rax)
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, 16(%rax)
+; X64-NEXT:    movq %rbx, 24(%rax)
+; X64-NEXT:    movq %rdx, 32(%rax)
+; X64-NEXT:    movq %rcx, 40(%rax)
+; X64-NEXT:    movq %r15, 48(%rax)
+; X64-NEXT:    movq %r14, 56(%rax)
+; X64-NEXT:    movq %r11, 64(%rax)
+; X64-NEXT:    movq %r10, 72(%rax)
+; X64-NEXT:    movq %r12, 80(%rax)
+; X64-NEXT:    movq %r9, 88(%rax)
+; X64-NEXT:    movq %rdi, 96(%rax)
+; X64-NEXT:    movq %r13, 104(%rax)
+; X64-NEXT:    movq %r8, 112(%rax)
+; X64-NEXT:    movl %esi, 120(%rax)
+; X64-NEXT:    shrq $32, %rsi
+; X64-NEXT:    andl $511, %esi # imm = 0x1FF
+; X64-NEXT:    movw %si, 124(%rax)
+; X64-NEXT:    addq $1160, %rsp # imm = 0x488
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r12
+; X64-NEXT:    popq %r13
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %r15
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+  %res = srem i1001 %a, %b
+  ret i1001 %res
+}
+
+define i129 @chain129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: chain129:
+; X86:       # %bb.0: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $92, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    andl $1, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    andl $1, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl %ebp, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    sete {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    testl %ebp, %ebp
+; X86-NEXT:    jne .LBB6_1
+; X86-NEXT:  # %bb.2: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    bsrl %esi, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:    addl $32, %ebx
+; X86-NEXT:    jmp .LBB6_3
+; X86-NEXT:  .LBB6_1:
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    bsrl %ebp, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:  .LBB6_3: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jne .LBB6_4
+; X86-NEXT:  # %bb.5: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    bsrl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl $31, %ecx
+; X86-NEXT:    addl $32, %ecx
+; X86-NEXT:    jmp .LBB6_6
+; X86-NEXT:  .LBB6_4:
+; X86-NEXT:    bsrl %ecx, %ecx
+; X86-NEXT:    xorl $31, %ecx
+; X86-NEXT:  .LBB6_6: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    jne .LBB6_8
+; X86-NEXT:  # %bb.7: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    addl $64, %ecx
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:  .LBB6_8: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    jne .LBB6_9
+; X86-NEXT:  # %bb.10: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    subl $-128, %ebx
+; X86-NEXT:    jmp .LBB6_11
+; X86-NEXT:  .LBB6_9:
+; X86-NEXT:    bsrl %edx, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:    addl $96, %ebx
+; X86-NEXT:  .LBB6_11: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    subl $127, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    sbbl %ebx, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne .LBB6_12
+; X86-NEXT:  # %bb.13: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    bsrl %ebx, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    addl $32, %eax
+; X86-NEXT:    jmp .LBB6_14
+; X86-NEXT:  .LBB6_12:
+; X86-NEXT:    bsrl %eax, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:  .LBB6_14: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB6_15
+; X86-NEXT:  # %bb.16: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    bsrl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl $31, %ecx
+; X86-NEXT:    addl $32, %ecx
+; X86-NEXT:    jmp .LBB6_17
+; X86-NEXT:  .LBB6_15:
+; X86-NEXT:    bsrl %edi, %ecx
+; X86-NEXT:    xorl $31, %ecx
+; X86-NEXT:  .LBB6_17: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    jne .LBB6_19
+; X86-NEXT:  # %bb.18: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    addl $64, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB6_19: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    orb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB6_20
+; X86-NEXT:  # %bb.21: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    subl $-128, %eax
+; X86-NEXT:    jmp .LBB6_22
+; X86-NEXT:  .LBB6_20:
+; X86-NEXT:    bsrl %ecx, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    addl $96, %eax
+; X86-NEXT:  .LBB6_22: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    subl $127, %eax
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    sbbl %ebx, %ebx
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    subl %eax, %edx
+; X86-NEXT:    sbbl %ecx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    sbbl %esi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    sbbl %edi, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl (%esp), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl $128, %eax
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl %edx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %ecx, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %ebp, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %edi, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %esi, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %ebx, %eax
+; X86-NEXT:    setb %al
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    orb %al, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    jne .LBB6_24
+; X86-NEXT:  # %bb.23: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB6_24: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    andl $1, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    jne .LBB6_26
+; X86-NEXT:  # %bb.25: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB6_26: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    jne .LBB6_27
+; X86-NEXT:  # %bb.139: # %_udiv-special-cases_udiv-special-cases
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    xorl $128, %esi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    je .LBB6_140
+; X86-NEXT:  # %bb.95: # %udiv-bb15
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    subb $64, %cl
+; X86-NEXT:    negb %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movb %cl, (%esp) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB6_97
+; X86-NEXT:  # %bb.96: # %udiv-bb15
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB6_97: # %udiv-bb15
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb $-128, %cl
+; X86-NEXT:    subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    shldl %cl, %ebx, %edx
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    shldl %cl, %esi, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB6_99
+; X86-NEXT:  # %bb.98:
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:  .LBB6_99: # %udiv-bb15
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    subb $64, %ch
+; X86-NEXT:    jb .LBB6_101
+; X86-NEXT:  # %bb.100: # %udiv-bb15
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:  .LBB6_101: # %udiv-bb15
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    movb (%esp), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB6_103
+; X86-NEXT:  # %bb.102: # %udiv-bb15
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB6_103: # %udiv-bb15
+; X86-NEXT:    negb %ch
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB6_105
+; X86-NEXT:  # %bb.104: # %udiv-bb15
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:  .LBB6_105: # %udiv-bb15
+; X86-NEXT:    cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    jae .LBB6_107
+; X86-NEXT:  # %bb.106:
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB6_107: # %udiv-bb15
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shrdl %cl, %ebp, %edi
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB6_109
+; X86-NEXT:  # %bb.108: # %udiv-bb15
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:  .LBB6_109: # %udiv-bb15
+; X86-NEXT:    cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    jae .LBB6_111
+; X86-NEXT:  # %bb.110:
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB6_111: # %udiv-bb15
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB6_113
+; X86-NEXT:  # %bb.112: # %udiv-bb15
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB6_113: # %udiv-bb15
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    shrdl %cl, %ebp, %edi
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB6_115
+; X86-NEXT:  # %bb.114: # %udiv-bb15
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:  .LBB6_115: # %udiv-bb15
+; X86-NEXT:    cmpb $64, %cl
+; X86-NEXT:    jae .LBB6_117
+; X86-NEXT:  # %bb.116:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB6_117: # %udiv-bb15
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    addl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
+; X86-NEXT:    testb %dl, %dl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    je .LBB6_119
+; X86-NEXT:  # %bb.118: # %udiv-bb15
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB6_119: # %udiv-bb15
+; X86-NEXT:    negb %dl
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB6_121
+; X86-NEXT:  # %bb.120: # %udiv-bb15
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB6_121: # %udiv-bb15
+; X86-NEXT:    cmpb $64, %dl
+; X86-NEXT:    jb .LBB6_123
+; X86-NEXT:  # %bb.122: # %udiv-bb15
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:  .LBB6_123: # %udiv-bb15
+; X86-NEXT:    andl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    orl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    jns .LBB6_125
+; X86-NEXT:  # %bb.124: # %udiv-bb15
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:  .LBB6_125: # %udiv-bb15
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB6_126
+; X86-NEXT:  # %bb.127: # %udiv-bb15
+; X86-NEXT:    js .LBB6_128
+; X86-NEXT:  .LBB6_129: # %udiv-bb15
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    jne .LBB6_130
+; X86-NEXT:  .LBB6_131: # %udiv-bb15
+; X86-NEXT:    js .LBB6_132
+; X86-NEXT:  .LBB6_133: # %udiv-bb15
+; X86-NEXT:    jne .LBB6_134
+; X86-NEXT:  .LBB6_135: # %udiv-bb15
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    jns .LBB6_137
+; X86-NEXT:  .LBB6_136: # %udiv-bb15
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB6_137: # %udiv-bb15
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB6_138
+; X86-NEXT:  # %bb.30: # %udiv-preheader4
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    shrdl %cl, %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB6_31
+; X86-NEXT:  # %bb.32: # %udiv-preheader4
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB6_33
+; X86-NEXT:  .LBB6_27:
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    jmp .LBB6_140
+; X86-NEXT:  .LBB6_126: # %udiv-bb15
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jns .LBB6_129
+; X86-NEXT:  .LBB6_128: # %udiv-bb15
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    je .LBB6_131
+; X86-NEXT:  .LBB6_130: # %udiv-bb15
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jns .LBB6_133
+; X86-NEXT:  .LBB6_132: # %udiv-bb15
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB6_135
+; X86-NEXT:  .LBB6_134: # %udiv-bb15
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    js .LBB6_136
+; X86-NEXT:    jmp .LBB6_137
+; X86-NEXT:  .LBB6_138:
+; X86-NEXT:    movl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jmp .LBB6_29
+; X86-NEXT:  .LBB6_31:
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:  .LBB6_33: # %udiv-preheader4
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, %ch
+; X86-NEXT:    subb $64, %ch
+; X86-NEXT:    jb .LBB6_35
+; X86-NEXT:  # %bb.34: # %udiv-preheader4
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:  .LBB6_35: # %udiv-preheader4
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    negb %ch
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    jne .LBB6_37
+; X86-NEXT:  # %bb.36: # %udiv-preheader4
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB6_37: # %udiv-preheader4
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb %al, %cl
+; X86-NEXT:    addb $-64, %cl
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jne .LBB6_39
+; X86-NEXT:  # %bb.38: # %udiv-preheader4
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:  .LBB6_39: # %udiv-preheader4
+; X86-NEXT:    cmpb $64, %al
+; X86-NEXT:    jb .LBB6_40
+; X86-NEXT:  # %bb.41: # %udiv-preheader4
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB6_42
+; X86-NEXT:  .LBB6_40:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl %ebx, %edx
+; X86-NEXT:  .LBB6_42: # %udiv-preheader4
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jne .LBB6_44
+; X86-NEXT:  # %bb.43: # %udiv-preheader4
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB6_44: # %udiv-preheader4
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movb %al, %cl
+; X86-NEXT:    shrdl %cl, %ebp, %edi
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    jne .LBB6_46
+; X86-NEXT:  # %bb.45: # %udiv-preheader4
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB6_46: # %udiv-preheader4
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    shrdl %cl, %edx, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB6_48
+; X86-NEXT:  # %bb.47: # %udiv-preheader4
+; X86-NEXT:    movl %edi, (%esp) # 4-byte Spill
+; X86-NEXT:  .LBB6_48: # %udiv-preheader4
+; X86-NEXT:    cmpb $64, %al
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jae .LBB6_50
+; X86-NEXT:  # %bb.49:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; X86-NEXT:  .LBB6_50: # %udiv-preheader4
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB6_52
+; X86-NEXT:  # %bb.51: # %udiv-preheader4
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB6_52: # %udiv-preheader4
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB6_54
+; X86-NEXT:  # %bb.53: # %udiv-preheader4
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB6_54: # %udiv-preheader4
+; X86-NEXT:    movb $-128, %ch
+; X86-NEXT:    subb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload
+; X86-NEXT:    cmpb $64, %ch
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jb .LBB6_56
+; X86-NEXT:  # %bb.55: # %udiv-preheader4
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:  .LBB6_56: # %udiv-preheader4
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    je .LBB6_58
+; X86-NEXT:  # %bb.57:
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB6_58: # %udiv-preheader4
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    subb $64, %cl
+; X86-NEXT:    jb .LBB6_60
+; X86-NEXT:  # %bb.59: # %udiv-preheader4
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB6_60: # %udiv-preheader4
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    negb %cl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB6_62
+; X86-NEXT:  # %bb.61: # %udiv-preheader4
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB6_62: # %udiv-preheader4
+; X86-NEXT:    cmpb $64, %ch
+; X86-NEXT:    jb .LBB6_64
+; X86-NEXT:  # %bb.63: # %udiv-preheader4
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB6_64: # %udiv-preheader4
+; X86-NEXT:    andl $1, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB6_66
+; X86-NEXT:  # %bb.65: # %udiv-preheader4
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:  .LBB6_66: # %udiv-preheader4
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb %ch, %ch
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    je .LBB6_68
+; X86-NEXT:  # %bb.67: # %udiv-preheader4
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB6_68: # %udiv-preheader4
+; X86-NEXT:    orl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    jne .LBB6_69
+; X86-NEXT:  # %bb.70: # %udiv-preheader4
+; X86-NEXT:    js .LBB6_71
+; X86-NEXT:  .LBB6_72: # %udiv-preheader4
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB6_74
+; X86-NEXT:  .LBB6_73: # %udiv-preheader4
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB6_74: # %udiv-preheader4
+; X86-NEXT:    movb %al, %cl
+; X86-NEXT:    addb $-128, %cl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB6_76
+; X86-NEXT:  # %bb.75: # %udiv-preheader4
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB6_76: # %udiv-preheader4
+; X86-NEXT:    cmpb $64, %cl
+; X86-NEXT:    jb .LBB6_78
+; X86-NEXT:  # %bb.77: # %udiv-preheader4
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB6_78: # %udiv-preheader4
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    je .LBB6_80
+; X86-NEXT:  # %bb.79: # %udiv-preheader4
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:  .LBB6_80: # %udiv-preheader4
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    jns .LBB6_82
+; X86-NEXT:  # %bb.81: # %udiv-preheader4
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB6_82: # %udiv-preheader4
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    je .LBB6_84
+; X86-NEXT:  # %bb.83: # %udiv-preheader4
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:  .LBB6_84: # %udiv-preheader4
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb %ch, %ch
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    je .LBB6_86
+; X86-NEXT:  # %bb.85: # %udiv-preheader4
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB6_86: # %udiv-preheader4
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    jns .LBB6_88
+; X86-NEXT:  # %bb.87: # %udiv-preheader4
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:  .LBB6_88: # %udiv-preheader4
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    je .LBB6_90
+; X86-NEXT:  # %bb.89: # %udiv-preheader4
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB6_90: # %udiv-preheader4
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %ecx, %edi
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    jns .LBB6_92
+; X86-NEXT:  # %bb.91: # %udiv-preheader4
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:  .LBB6_92: # %udiv-preheader4
+; X86-NEXT:    je .LBB6_94
+; X86-NEXT:  # %bb.93: # %udiv-preheader4
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:  .LBB6_94: # %udiv-preheader4
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    addl $-1, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    adcl $-1, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    adcl $-1, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    adcl $-1, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $1, %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB6_28: # %udiv-do-while3
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    shrl $31, %edx
+; X86-NEXT:    andl $1, %ebp
+; X86-NEXT:    leal (%ebp,%edi,2), %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ebp
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ecx, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl $1, %ebx, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl $1, %eax, %ebx
+; X86-NEXT:    orl %ecx, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shrl $31, %esi
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:    andl $1, %ebp
+; X86-NEXT:    cmpl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    sbbl %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %edx, %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    andl $1, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    subl %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl %ebx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    addl $-1, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    adcl $-1, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    adcl $-1, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    adcl $1, %edx
+; X86-NEXT:    andl $1, %edx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB6_28
+; X86-NEXT:  .LBB6_29: # %udiv-loop-exit2
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebp, %ecx
+; X86-NEXT:    movl (%esp), %esi # 4-byte Reload
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    shldl $1, %ebx, %ebp
+; X86-NEXT:    orl %esi, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %ebx
+; X86-NEXT:    orl %esi, %ebx
+; X86-NEXT:    shrl $31, %edx
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    addl %edi, %edi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $1, %edx
+; X86-NEXT:  .LBB6_140: # %udiv-end1
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    andl $1, %esi
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    negl %edi
+; X86-NEXT:    xorl %edi, %edx
+; X86-NEXT:    xorl %edi, %ecx
+; X86-NEXT:    xorl %edi, %ebp
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    xorl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    xorl %edi, %ebx
+; X86-NEXT:    subl %edi, %ebx
+; X86-NEXT:    sbbl %edi, %eax
+; X86-NEXT:    sbbl %edi, %ebp
+; X86-NEXT:    sbbl %edi, %ecx
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT:    sbbl %edi, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $1, %edx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ebp, %edx
+; X86-NEXT:    orl %ebx, %edx
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    sete %dl
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne .LBB6_141
+; X86-NEXT:  # %bb.142: # %udiv-end1
+; X86-NEXT:    bsrl %ecx, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    addl $32, %eax
+; X86-NEXT:    jmp .LBB6_143
+; X86-NEXT:  .LBB6_141:
+; X86-NEXT:    bsrl %eax, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:  .LBB6_143: # %udiv-end1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    testl %ebp, %ebp
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    jne .LBB6_144
+; X86-NEXT:  # %bb.145: # %udiv-end1
+; X86-NEXT:    bsrl %ebx, %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:    addl $32, %esi
+; X86-NEXT:    jmp .LBB6_146
+; X86-NEXT:  .LBB6_144:
+; X86-NEXT:    bsrl %ebp, %esi
+; X86-NEXT:    xorl $31, %esi
+; X86-NEXT:  .LBB6_146: # %udiv-end1
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    orl (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    jne .LBB6_148
+; X86-NEXT:  # %bb.147: # %udiv-end1
+; X86-NEXT:    addl $64, %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB6_148: # %udiv-end1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB6_149
+; X86-NEXT:  # %bb.150: # %udiv-end1
+; X86-NEXT:    subl $-128, %eax
+; X86-NEXT:    jmp .LBB6_151
+; X86-NEXT:  .LBB6_149:
+; X86-NEXT:    bsrl %ecx, %eax
+; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    addl $96, %eax
+; X86-NEXT:  .LBB6_151: # %udiv-end1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    subl $127, %eax
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $124, %ecx
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    sbbl %ebp, %ebx
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %edx, %eax
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %edi, %ebp
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl $128, %edx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl %ecx, %edx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %ebx, %edx
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %eax, %edx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %ebp, %edx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %esi, %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %edi, %eax
+; X86-NEXT:    setb %al
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
+; X86-NEXT:    orb %al, %dl
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB6_153
+; X86-NEXT:  # %bb.152: # %udiv-end1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB6_153: # %udiv-end1
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    andl $1, %ebx
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb %dl, %dl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB6_155
+; X86-NEXT:  # %bb.154: # %udiv-end1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:  .LBB6_155: # %udiv-end1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB6_203
+; X86-NEXT:  # %bb.156: # %udiv-end1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    xorl $128, %edx
+; X86-NEXT:    orl %ebx, %edx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    je .LBB6_203
+; X86-NEXT:  # %bb.157: # %udiv-bb1
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    subb $64, %cl
+; X86-NEXT:    negb %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB6_159
+; X86-NEXT:  # %bb.158: # %udiv-bb1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:  .LBB6_159: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb $-128, %cl
+; X86-NEXT:    subb %bl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %ebp, %edx
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%esp), %ebp # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %ebp
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    je .LBB6_161
+; X86-NEXT:  # %bb.160:
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB6_161: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X86-NEXT:    subb $64, %ch
+; X86-NEXT:    jb .LBB6_163
+; X86-NEXT:  # %bb.162: # %udiv-bb1
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB6_163: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB6_165
+; X86-NEXT:  # %bb.164: # %udiv-bb1
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB6_165: # %udiv-bb1
+; X86-NEXT:    negb %ch
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB6_167
+; X86-NEXT:  # %bb.166: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:  .LBB6_167: # %udiv-bb1
+; X86-NEXT:    cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    jae .LBB6_169
+; X86-NEXT:  # %bb.168:
+; X86-NEXT:    orl %esi, %ebp
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB6_169: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %esi
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB6_171
+; X86-NEXT:  # %bb.170: # %udiv-bb1
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:  .LBB6_171: # %udiv-bb1
+; X86-NEXT:    cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    jae .LBB6_173
+; X86-NEXT:  # %bb.172:
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB6_173: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shrdl %cl, %edx, %ebx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB6_175
+; X86-NEXT:  # %bb.174: # %udiv-bb1
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:  .LBB6_175: # %udiv-bb1
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %ebx, %ebp
+; X86-NEXT:    shrl %cl, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB6_177
+; X86-NEXT:  # %bb.176: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:  .LBB6_177: # %udiv-bb1
+; X86-NEXT:    cmpb $64, %cl
+; X86-NEXT:    jae .LBB6_179
+; X86-NEXT:  # %bb.178:
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:  .LBB6_179: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    adcl $0, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    je .LBB6_181
+; X86-NEXT:  # %bb.180: # %udiv-bb1
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:  .LBB6_181: # %udiv-bb1
+; X86-NEXT:    negb %cl
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB6_183
+; X86-NEXT:  # %bb.182: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:  .LBB6_183: # %udiv-bb1
+; X86-NEXT:    cmpb $64, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jb .LBB6_185
+; X86-NEXT:  # %bb.184: # %udiv-bb1
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:  .LBB6_185: # %udiv-bb1
+; X86-NEXT:    andl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    js .LBB6_186
+; X86-NEXT:  # %bb.187: # %udiv-bb1
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    jne .LBB6_188
+; X86-NEXT:  .LBB6_189: # %udiv-bb1
+; X86-NEXT:    js .LBB6_190
+; X86-NEXT:  .LBB6_191: # %udiv-bb1
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    jne .LBB6_192
+; X86-NEXT:  .LBB6_193: # %udiv-bb1
+; X86-NEXT:    js .LBB6_194
+; X86-NEXT:  .LBB6_195: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    je .LBB6_197
+; X86-NEXT:  .LBB6_196: # %udiv-bb1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB6_197: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jns .LBB6_199
+; X86-NEXT:  # %bb.198: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB6_199: # %udiv-bb1
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    je .LBB6_200
+; X86-NEXT:  # %bb.204: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl (%esp), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %dl
+; X86-NEXT:    jne .LBB6_205
+; X86-NEXT:  # %bb.206: # %udiv-preheader
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jmp .LBB6_207
+; X86-NEXT:  .LBB6_186: # %udiv-bb1
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    je .LBB6_189
+; X86-NEXT:  .LBB6_188: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    jns .LBB6_191
+; X86-NEXT:  .LBB6_190: # %udiv-bb1
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    je .LBB6_193
+; X86-NEXT:  .LBB6_192: # %udiv-bb1
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jns .LBB6_195
+; X86-NEXT:  .LBB6_194: # %udiv-bb1
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB6_196
+; X86-NEXT:    jmp .LBB6_197
+; X86-NEXT:  .LBB6_200:
+; X86-NEXT:    movl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB6_202
+; X86-NEXT:  .LBB6_69: # %udiv-preheader4
+; X86-NEXT:    movl (%esp), %esi # 4-byte Reload
+; X86-NEXT:    jns .LBB6_72
+; X86-NEXT:  .LBB6_71: # %udiv-preheader4
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB6_73
+; X86-NEXT:    jmp .LBB6_74
+; X86-NEXT:  .LBB6_205:
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB6_207: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    subb $64, %cl
+; X86-NEXT:    jb .LBB6_209
+; X86-NEXT:  # %bb.208: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:  .LBB6_209: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    negb %cl
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:    shll %cl, %ebp
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    jne .LBB6_211
+; X86-NEXT:  # %bb.210: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:  .LBB6_211: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movb %al, %bh
+; X86-NEXT:    addb $-64, %bh
+; X86-NEXT:    movb %bh, %cl
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %bh
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    jne .LBB6_213
+; X86-NEXT:  # %bb.212: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:  .LBB6_213: # %udiv-preheader
+; X86-NEXT:    cmpb $64, %al
+; X86-NEXT:    jb .LBB6_214
+; X86-NEXT:  # %bb.215: # %udiv-preheader
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    jmp .LBB6_216
+; X86-NEXT:  .LBB6_214:
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:  .LBB6_216: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %bl
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    jne .LBB6_218
+; X86-NEXT:  # %bb.217: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:  .LBB6_218: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB6_220
+; X86-NEXT:  # %bb.219: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB6_220: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movb %bh, %cl
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %bh
+; X86-NEXT:    jne .LBB6_222
+; X86-NEXT:  # %bb.221: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB6_222: # %udiv-preheader
+; X86-NEXT:    cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    jae .LBB6_224
+; X86-NEXT:  # %bb.223:
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB6_224: # %udiv-preheader
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    jne .LBB6_226
+; X86-NEXT:  # %bb.225: # %udiv-preheader
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:  .LBB6_226: # %udiv-preheader
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:    shldl %cl, %esi, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB6_228
+; X86-NEXT:  # %bb.227: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB6_228: # %udiv-preheader
+; X86-NEXT:    movb $-128, %ch
+; X86-NEXT:    subb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload
+; X86-NEXT:    cmpb $64, %ch
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    jb .LBB6_230
+; X86-NEXT:  # %bb.229: # %udiv-preheader
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB6_230: # %udiv-preheader
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shldl %cl, %esi, %ebp
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    je .LBB6_232
+; X86-NEXT:  # %bb.231:
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB6_232: # %udiv-preheader
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    subb $64, %cl
+; X86-NEXT:    jb .LBB6_234
+; X86-NEXT:  # %bb.233: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB6_234: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    negb %cl
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    shrdl %cl, %esi, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    jne .LBB6_236
+; X86-NEXT:  # %bb.235: # %udiv-preheader
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:  .LBB6_236: # %udiv-preheader
+; X86-NEXT:    cmpb $64, %ch
+; X86-NEXT:    jb .LBB6_238
+; X86-NEXT:  # %bb.237: # %udiv-preheader
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:  .LBB6_238: # %udiv-preheader
+; X86-NEXT:    andl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    je .LBB6_240
+; X86-NEXT:  # %bb.239: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB6_240: # %udiv-preheader
+; X86-NEXT:    testb %ch, %ch
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    je .LBB6_242
+; X86-NEXT:  # %bb.241: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB6_242: # %udiv-preheader
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    je .LBB6_244
+; X86-NEXT:  # %bb.243: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB6_244: # %udiv-preheader
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %esi # 4-byte Reload
+; X86-NEXT:    jns .LBB6_246
+; X86-NEXT:  # %bb.245: # %udiv-preheader
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:  .LBB6_246: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    je .LBB6_248
+; X86-NEXT:  # %bb.247: # %udiv-preheader
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:  .LBB6_248: # %udiv-preheader
+; X86-NEXT:    movb %al, %cl
+; X86-NEXT:    addb $-128, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    shrdl %cl, %eax, %ebp
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    jne .LBB6_250
+; X86-NEXT:  # %bb.249: # %udiv-preheader
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:  .LBB6_250: # %udiv-preheader
+; X86-NEXT:    cmpb $64, %cl
+; X86-NEXT:    jb .LBB6_252
+; X86-NEXT:  # %bb.251: # %udiv-preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB6_252: # %udiv-preheader
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    je .LBB6_254
+; X86-NEXT:  # %bb.253: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB6_254: # %udiv-preheader
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    jns .LBB6_256
+; X86-NEXT:  # %bb.255: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:  .LBB6_256: # %udiv-preheader
+; X86-NEXT:    je .LBB6_258
+; X86-NEXT:  # %bb.257: # %udiv-preheader
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:  .LBB6_258: # %udiv-preheader
+; X86-NEXT:    testb %ch, %ch
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    je .LBB6_260
+; X86-NEXT:  # %bb.259: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:  .LBB6_260: # %udiv-preheader
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    jns .LBB6_262
+; X86-NEXT:  # %bb.261: # %udiv-preheader
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:  .LBB6_262: # %udiv-preheader
+; X86-NEXT:    je .LBB6_264
+; X86-NEXT:  # %bb.263: # %udiv-preheader
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB6_264: # %udiv-preheader
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    jns .LBB6_266
+; X86-NEXT:  # %bb.265: # %udiv-preheader
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:  .LBB6_266: # %udiv-preheader
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB6_268
+; X86-NEXT:  # %bb.267: # %udiv-preheader
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:  .LBB6_268: # %udiv-preheader
+; X86-NEXT:    movl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB6_201: # %udiv-do-while
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %ecx
+; X86-NEXT:    shrl $31, %edx
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    leal (%eax,%edi,2), %edi
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebx, %esi
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl $1, %esi, %ebx
+; X86-NEXT:    orl %eax, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shldl $1, %ebx, %esi
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shrl $31, %ebp
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    addl %ebx, %ebx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    movl $16, %ebx
+; X86-NEXT:    cmpl %edi, %ebx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %ecx, %edx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %esi, %edx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %ebx, %edx
+; X86-NEXT:    andl $1, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    andl $1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $17, %edx
+; X86-NEXT:    subl %edx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    sbbl $0, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sbbl $0, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    addl $-1, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ebx
+; X86-NEXT:    adcl $1, %edi
+; X86-NEXT:    andl $1, %edi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ebx, %edx
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ebp, %ebx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ecx, %ebx
+; X86-NEXT:    orl %edx, %ebx
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    jne .LBB6_201
+; X86-NEXT:  .LBB6_202: # %udiv-loop-exit
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    shldl $1, %edx, %ecx
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %edx
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl $1, %esi, %edi
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    shrl $31, %ebx
+; X86-NEXT:    orl %eax, %ebx
+; X86-NEXT:    addl %esi, %esi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    andl $1, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:  .LBB6_203: # %udiv-end
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    xorl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    xorl %edx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    xorl %edx, %ebx
+; X86-NEXT:    xorl %edx, %edi
+; X86-NEXT:    xorl %edx, %esi
+; X86-NEXT:    subl %edx, %esi
+; X86-NEXT:    sbbl %edx, %edi
+; X86-NEXT:    sbbl %edx, %ebx
+; X86-NEXT:    sbbl %edx, %eax
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, (%ebp)
+; X86-NEXT:    movl %edi, 4(%ebp)
+; X86-NEXT:    movl %ebx, 8(%ebp)
+; X86-NEXT:    movl %eax, 12(%ebp)
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    movb %cl, 16(%ebp)
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    addl $92, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: chain129:
+; X64:       # %bb.0: # %_udiv-special-cases_udiv-special-cases
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %r13
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movq %rdx, %r11
+; X64-NEXT:    movl %r11d, %ebp
+; X64-NEXT:    andl $1, %ebp
+; X64-NEXT:    movl %r9d, %eax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    movq %rcx, %rdx
+; X64-NEXT:    orq %rax, %rdx
+; X64-NEXT:    orq %r8, %rdx
+; X64-NEXT:    sete %r12b
+; X64-NEXT:    movq %rdi, %rdx
+; X64-NEXT:    orq %rbp, %rdx
+; X64-NEXT:    orq %rsi, %rdx
+; X64-NEXT:    sete %r10b
+; X64-NEXT:    bsrq %r8, %rdx
+; X64-NEXT:    xorq $63, %rdx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    bsrq %rcx, %rbx
+; X64-NEXT:    xorq $63, %rbx
+; X64-NEXT:    addq $64, %rbx
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    testq %r8, %r8
+; X64-NEXT:    cmovneq %rdx, %rbx
+; X64-NEXT:    subq $-128, %rbx
+; X64-NEXT:    bsrq %rax, %r8
+; X64-NEXT:    xorq $63, %r8
+; X64-NEXT:    addq $64, %r8
+; X64-NEXT:    testq %rax, %rax
+; X64-NEXT:    cmoveq %rbx, %r8
+; X64-NEXT:    xorl %r15d, %r15d
+; X64-NEXT:    subq $127, %r8
+; X64-NEXT:    movl $0, %r13d
+; X64-NEXT:    sbbq %r13, %r13
+; X64-NEXT:    movl $0, %ebx
+; X64-NEXT:    sbbq %rbx, %rbx
+; X64-NEXT:    movl $0, %r14d
+; X64-NEXT:    sbbq %r14, %r14
+; X64-NEXT:    bsrq %rsi, %rdx
+; X64-NEXT:    xorq $63, %rdx
+; X64-NEXT:    bsrq %rdi, %rcx
+; X64-NEXT:    xorq $63, %rcx
+; X64-NEXT:    addq $64, %rcx
+; X64-NEXT:    testq %rsi, %rsi
+; X64-NEXT:    cmovneq %rdx, %rcx
+; X64-NEXT:    subq $-128, %rcx
+; X64-NEXT:    bsrq %rbp, %rdx
+; X64-NEXT:    xorq $63, %rdx
+; X64-NEXT:    addq $64, %rdx
+; X64-NEXT:    testq %rbp, %rbp
+; X64-NEXT:    cmoveq %rcx, %rdx
+; X64-NEXT:    subq $127, %rdx
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    movl $0, %ebp
+; X64-NEXT:    sbbq %rbp, %rbp
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    subq %rdx, %r8
+; X64-NEXT:    sbbq %rax, %r13
+; X64-NEXT:    sbbq %rbp, %rbx
+; X64-NEXT:    sbbq %rcx, %r14
+; X64-NEXT:    movl $128, %eax
+; X64-NEXT:    cmpq %r8, %rax
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %r13, %rcx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rbx, %rcx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %r14, %rcx
+; X64-NEXT:    setb %al
+; X64-NEXT:    orb %r10b, %al
+; X64-NEXT:    orb %r12b, %al
+; X64-NEXT:    movq %r11, %rcx
+; X64-NEXT:    cmovneq %r15, %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    movq %rsi, %r14
+; X64-NEXT:    cmovneq %r15, %r14
+; X64-NEXT:    cmoveq %rdi, %r15
+; X64-NEXT:    jne .LBB6_6
+; X64-NEXT:  # %bb.1: # %_udiv-special-cases_udiv-special-cases
+; X64-NEXT:    andl $1, %ebx
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    xorq $128, %rax
+; X64-NEXT:    orq %rbx, %rax
+; X64-NEXT:    orq %r13, %rax
+; X64-NEXT:    je .LBB6_6
+; X64-NEXT:  # %bb.2: # %udiv-bb15
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl %r8d, %r12d
+; X64-NEXT:    movb $-128, %r14b
+; X64-NEXT:    subb %r8b, %r14b
+; X64-NEXT:    movq %r11, %rdx
+; X64-NEXT:    movl %r14d, %ecx
+; X64-NEXT:    shlq %cl, %rdx
+; X64-NEXT:    movq %rdi, %rbp
+; X64-NEXT:    shlq %cl, %rbp
+; X64-NEXT:    xorl %r10d, %r10d
+; X64-NEXT:    testb $64, %r14b
+; X64-NEXT:    movq %rbp, %r15
+; X64-NEXT:    cmovneq %r10, %r15
+; X64-NEXT:    cmovneq %r10, %rdx
+; X64-NEXT:    movq %rsi, %r9
+; X64-NEXT:    shldq %cl, %rdi, %rsi
+; X64-NEXT:    testb $64, %r14b
+; X64-NEXT:    cmovneq %rbp, %rsi
+; X64-NEXT:    addq $1, %r8
+; X64-NEXT:    adcq $0, %r13
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    andl $1, %ebx
+; X64-NEXT:    movq %rdi, %rbp
+; X64-NEXT:    movl %r12d, %ecx
+; X64-NEXT:    shrdq %cl, %r9, %rbp
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    testb $64, %r12b
+; X64-NEXT:    cmoveq %rbp, %rax
+; X64-NEXT:    orl %eax, %edx
+; X64-NEXT:    negb %r12b
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movl %r12d, %ecx
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    testb $64, %r12b
+; X64-NEXT:    cmovneq %r10, %rax
+; X64-NEXT:    testb %r14b, %r14b
+; X64-NEXT:    cmovsq %r10, %rsi
+; X64-NEXT:    cmovsq %rax, %rdx
+; X64-NEXT:    cmoveq %r11, %rdx
+; X64-NEXT:    cmovsq %r10, %r15
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    orq %rbx, %rax
+; X64-NEXT:    orq %r13, %rax
+; X64-NEXT:    je .LBB6_13
+; X64-NEXT:  # %bb.3: # %udiv-preheader4
+; X64-NEXT:    andl $1, %edx
+; X64-NEXT:    movq %rdi, %r10
+; X64-NEXT:    movl %r8d, %ecx
+; X64-NEXT:    shrdq %cl, %r9, %r10
+; X64-NEXT:    movq %r9, %rbp
+; X64-NEXT:    shrq %cl, %rbp
+; X64-NEXT:    xorl %r12d, %r12d
+; X64-NEXT:    testb $64, %r8b
+; X64-NEXT:    cmovneq %rbp, %r10
+; X64-NEXT:    cmovneq %r12, %rbp
+; X64-NEXT:    andl $1, %r11d
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    subb %r8b, %cl
+; X64-NEXT:    xorl %r14d, %r14d
+; X64-NEXT:    shldq %cl, %r11, %r14
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rax, %r14
+; X64-NEXT:    cmovneq %r12, %rax
+; X64-NEXT:    orq %r10, %rax
+; X64-NEXT:    orq %rbp, %r14
+; X64-NEXT:    leal -128(%r8), %ecx
+; X64-NEXT:    shrdq %cl, %r12, %r11
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %r12, %r11
+; X64-NEXT:    testb %r8b, %r8b
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    cmovsq %r12, %r14
+; X64-NEXT:    cmoveq %r9, %r14
+; X64-NEXT:    cmovnsq %rax, %r11
+; X64-NEXT:    cmoveq %rdi, %r11
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    addq $-1, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    adcq $-1, %r12
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    adcq $1, %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    xorl %r10d, %r10d
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    xorl %ebp, %ebp
+; X64-NEXT:    movq %r14, %rdi
+; X64-NEXT:    movq %rsi, %r9
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB6_4: # %udiv-do-while3
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    shldq $1, %r11, %rdi
+; X64-NEXT:    shrq $63, %r14
+; X64-NEXT:    andl $1, %edx
+; X64-NEXT:    leaq (%rdx,%r11,2), %r11
+; X64-NEXT:    shldq $1, %r15, %r9
+; X64-NEXT:    orq %rax, %r9
+; X64-NEXT:    shrq $63, %rsi
+; X64-NEXT:    addq %r15, %r15
+; X64-NEXT:    orq %r10, %r15
+; X64-NEXT:    orl %esi, %ebp
+; X64-NEXT:    movl %ebp, %edx
+; X64-NEXT:    andl $1, %edx
+; X64-NEXT:    cmpq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Reload
+; X64-NEXT:    movq %r12, %rax
+; X64-NEXT:    sbbq %rdi, %rax
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    sbbq %r14, %rax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    movl %eax, %r10d
+; X64-NEXT:    andl $1, %r10d
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    subq %rax, %r11
+; X64-NEXT:    sbbq %rsi, %rdi
+; X64-NEXT:    addq $-1, %r8
+; X64-NEXT:    adcq $-1, %r13
+; X64-NEXT:    adcq $1, %rbx
+; X64-NEXT:    andl $1, %ebx
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    orq %rbx, %rax
+; X64-NEXT:    orq %r13, %rax
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    movl $0, %ebp
+; X64-NEXT:    movq %rdi, %r14
+; X64-NEXT:    movq %r9, %rsi
+; X64-NEXT:    jne .LBB6_4
+; X64-NEXT:    jmp .LBB6_5
+; X64-NEXT:  .LBB6_13:
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rsi, %r9
+; X64-NEXT:  .LBB6_5: # %udiv-loop-exit2
+; X64-NEXT:    movq %r9, %r14
+; X64-NEXT:    shldq $1, %r15, %r14
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    orq %rcx, %r14
+; X64-NEXT:    shrq $63, %r9
+; X64-NEXT:    addq %r15, %r15
+; X64-NEXT:    orq %r10, %r15
+; X64-NEXT:    orl %r9d, %ecx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:  .LBB6_6: # %udiv-end1
+; X64-NEXT:    movl %ecx, %r11d
+; X64-NEXT:    andl $1, %r11d
+; X64-NEXT:    movq %r11, %rbp
+; X64-NEXT:    negq %rbp
+; X64-NEXT:    xorq %rbp, %rcx
+; X64-NEXT:    xorq %rbp, %r14
+; X64-NEXT:    xorq %rbp, %r15
+; X64-NEXT:    subq %rbp, %r15
+; X64-NEXT:    sbbq %rbp, %r14
+; X64-NEXT:    sbbq %rbp, %rcx
+; X64-NEXT:    movl %ecx, %r8d
+; X64-NEXT:    andl $1, %r8d
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    orq %r8, %rax
+; X64-NEXT:    orq %r14, %rax
+; X64-NEXT:    sete %r10b
+; X64-NEXT:    bsrq %r14, %rax
+; X64-NEXT:    xorq $63, %rax
+; X64-NEXT:    bsrq %r15, %rdx
+; X64-NEXT:    xorq $63, %rdx
+; X64-NEXT:    addq $64, %rdx
+; X64-NEXT:    testq %r14, %r14
+; X64-NEXT:    cmovneq %rax, %rdx
+; X64-NEXT:    subq $-128, %rdx
+; X64-NEXT:    bsrq %r8, %rsi
+; X64-NEXT:    xorq $63, %rsi
+; X64-NEXT:    addq $64, %rsi
+; X64-NEXT:    testq %r8, %r8
+; X64-NEXT:    cmoveq %rdx, %rsi
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    subq $127, %rsi
+; X64-NEXT:    movl $0, %edx
+; X64-NEXT:    sbbq %rdx, %rdx
+; X64-NEXT:    movl $0, %edi
+; X64-NEXT:    sbbq %rdi, %rdi
+; X64-NEXT:    movl $0, %ebx
+; X64-NEXT:    sbbq %rbx, %rbx
+; X64-NEXT:    movl $124, %r13d
+; X64-NEXT:    subq %rsi, %r13
+; X64-NEXT:    movl $0, %r12d
+; X64-NEXT:    sbbq %rdx, %r12
+; X64-NEXT:    movl $0, %r9d
+; X64-NEXT:    sbbq %rdi, %r9
+; X64-NEXT:    movl $0, %edx
+; X64-NEXT:    sbbq %rbx, %rdx
+; X64-NEXT:    movl $128, %esi
+; X64-NEXT:    cmpq %r13, %rsi
+; X64-NEXT:    movl $0, %esi
+; X64-NEXT:    sbbq %r12, %rsi
+; X64-NEXT:    movl $0, %esi
+; X64-NEXT:    sbbq %r9, %rsi
+; X64-NEXT:    movl $0, %esi
+; X64-NEXT:    sbbq %rdx, %rsi
+; X64-NEXT:    setb %dl
+; X64-NEXT:    orb %r10b, %dl
+; X64-NEXT:    cmovneq %rax, %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    testb %dl, %dl
+; X64-NEXT:    movq %r14, %r10
+; X64-NEXT:    cmovneq %rax, %r14
+; X64-NEXT:    cmoveq %r15, %rax
+; X64-NEXT:    jne .LBB6_12
+; X64-NEXT:  # %bb.7: # %udiv-end1
+; X64-NEXT:    andl $1, %r9d
+; X64-NEXT:    movq %r13, %rsi
+; X64-NEXT:    xorq $128, %rsi
+; X64-NEXT:    orq %r9, %rsi
+; X64-NEXT:    orq %r12, %rsi
+; X64-NEXT:    je .LBB6_12
+; X64-NEXT:  # %bb.8: # %udiv-bb1
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl %r13d, %edx
+; X64-NEXT:    movb $-128, %sil
+; X64-NEXT:    subb %r13b, %sil
+; X64-NEXT:    movq %r8, %rbx
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    shlq %cl, %rbx
+; X64-NEXT:    movq %r15, %rdi
+; X64-NEXT:    shlq %cl, %rdi
+; X64-NEXT:    xorl %r11d, %r11d
+; X64-NEXT:    testb $64, %sil
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    cmovneq %r11, %rax
+; X64-NEXT:    cmovneq %r11, %rbx
+; X64-NEXT:    movq %r10, %r14
+; X64-NEXT:    shldq %cl, %r15, %r14
+; X64-NEXT:    testb $64, %sil
+; X64-NEXT:    cmovneq %rdi, %r14
+; X64-NEXT:    addq $1, %r13
+; X64-NEXT:    adcq $0, %r12
+; X64-NEXT:    adcq $0, %r9
+; X64-NEXT:    andl $1, %r9d
+; X64-NEXT:    movq %r15, %rdi
+; X64-NEXT:    movl %edx, %ecx
+; X64-NEXT:    shrdq %cl, %r10, %rdi
+; X64-NEXT:    movq %r10, %rbp
+; X64-NEXT:    shrq %cl, %rbp
+; X64-NEXT:    testb $64, %dl
+; X64-NEXT:    cmoveq %rdi, %rbp
+; X64-NEXT:    orl %ebp, %ebx
+; X64-NEXT:    negb %dl
+; X64-NEXT:    movq %r15, %rdi
+; X64-NEXT:    movl %edx, %ecx
+; X64-NEXT:    shlq %cl, %rdi
+; X64-NEXT:    testb $64, %dl
+; X64-NEXT:    cmovneq %r11, %rdi
+; X64-NEXT:    testb %sil, %sil
+; X64-NEXT:    cmovsq %r11, %r14
+; X64-NEXT:    cmovsq %rdi, %rbx
+; X64-NEXT:    cmoveq %r8, %rbx
+; X64-NEXT:    cmovsq %r11, %rax
+; X64-NEXT:    movq %r13, %rcx
+; X64-NEXT:    orq %r9, %rcx
+; X64-NEXT:    orq %r12, %rcx
+; X64-NEXT:    je .LBB6_14
+; X64-NEXT:  # %bb.9: # %udiv-preheader
+; X64-NEXT:    andl $1, %ebx
+; X64-NEXT:    movq %r15, %rdx
+; X64-NEXT:    movl %r13d, %ecx
+; X64-NEXT:    shrdq %cl, %r10, %rdx
+; X64-NEXT:    movq %r10, %rsi
+; X64-NEXT:    shrq %cl, %rsi
+; X64-NEXT:    xorl %ebp, %ebp
+; X64-NEXT:    testb $64, %r13b
+; X64-NEXT:    cmovneq %rsi, %rdx
+; X64-NEXT:    cmovneq %rbp, %rsi
+; X64-NEXT:    movb $-128, %cl
+; X64-NEXT:    subb %r13b, %cl
+; X64-NEXT:    movq %r10, %r11
+; X64-NEXT:    xorl %r10d, %r10d
+; X64-NEXT:    shldq %cl, %r8, %r10
+; X64-NEXT:    movq %r8, %rdi
+; X64-NEXT:    shlq %cl, %rdi
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rdi, %r10
+; X64-NEXT:    cmovneq %rbp, %rdi
+; X64-NEXT:    orq %rdx, %rdi
+; X64-NEXT:    orq %rsi, %r10
+; X64-NEXT:    leal -128(%r13), %ecx
+; X64-NEXT:    shrdq %cl, %rbp, %r8
+; X64-NEXT:    testb $64, %cl
+; X64-NEXT:    cmovneq %rbp, %r8
+; X64-NEXT:    testb %r13b, %r13b
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovsq %rbp, %r10
+; X64-NEXT:    cmoveq %r11, %r10
+; X64-NEXT:    cmovnsq %rdi, %r8
+; X64-NEXT:    cmoveq %r15, %r8
+; X64-NEXT:    movl $16, %r15d
+; X64-NEXT:    xorl %r11d, %r11d
+; X64-NEXT:    xorl %esi, %esi
+; X64-NEXT:    xorl %edi, %edi
+; X64-NEXT:    movq %r10, %rdx
+; X64-NEXT:    movq %r14, %rbp
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB6_10: # %udiv-do-while
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    shldq $1, %r8, %rdx
+; X64-NEXT:    shrq $63, %r10
+; X64-NEXT:    andl $1, %ebx
+; X64-NEXT:    leaq (%rbx,%r8,2), %r8
+; X64-NEXT:    shldq $1, %rax, %rbp
+; X64-NEXT:    orq %rsi, %rbp
+; X64-NEXT:    shrq $63, %r14
+; X64-NEXT:    addq %rax, %rax
+; X64-NEXT:    orq %r11, %rax
+; X64-NEXT:    orl %r14d, %edi
+; X64-NEXT:    movl %edi, %ebx
+; X64-NEXT:    andl $1, %ebx
+; X64-NEXT:    cmpq %r8, %r15
+; X64-NEXT:    movl $0, %esi
+; X64-NEXT:    sbbq %rdx, %rsi
+; X64-NEXT:    movl $0, %edi
+; X64-NEXT:    sbbq %r10, %rdi
+; X64-NEXT:    andl $1, %edi
+; X64-NEXT:    negq %rdi
+; X64-NEXT:    movl %edi, %r11d
+; X64-NEXT:    andl $1, %r11d
+; X64-NEXT:    andl $17, %edi
+; X64-NEXT:    subq %rdi, %r8
+; X64-NEXT:    sbbq $0, %rdx
+; X64-NEXT:    addq $-1, %r13
+; X64-NEXT:    adcq $-1, %r12
+; X64-NEXT:    adcq $1, %r9
+; X64-NEXT:    andl $1, %r9d
+; X64-NEXT:    movq %r13, %rdi
+; X64-NEXT:    orq %r9, %rdi
+; X64-NEXT:    orq %r12, %rdi
+; X64-NEXT:    movl $0, %esi
+; X64-NEXT:    movl $0, %edi
+; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    movq %rbp, %r14
+; X64-NEXT:    jne .LBB6_10
+; X64-NEXT:    jmp .LBB6_11
+; X64-NEXT:  .LBB6_14:
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    movq %r14, %rbp
+; X64-NEXT:  .LBB6_11: # %udiv-loop-exit
+; X64-NEXT:    movq %rbp, %r14
+; X64-NEXT:    shldq $1, %rax, %r14
+; X64-NEXT:    orq %rcx, %r14
+; X64-NEXT:    shrq $63, %rbp
+; X64-NEXT:    addq %rax, %rax
+; X64-NEXT:    orq %r11, %rax
+; X64-NEXT:    orl %ebp, %ecx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:  .LBB6_12: # %udiv-end
+; X64-NEXT:    xorq %r11, %rcx
+; X64-NEXT:    xorq %rbp, %r14
+; X64-NEXT:    xorq %rbp, %rax
+; X64-NEXT:    subq %rbp, %rax
+; X64-NEXT:    sbbq %rbp, %r14
+; X64-NEXT:    sbbq %r11, %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    movq %r14, %rdx
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r12
+; X64-NEXT:    popq %r13
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %r15
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+  %res = udiv i129 %a, %b
+  %res2 = sdiv i129 %res, 17
+  ret i129 %res2
+}