Index: lib/Target/X86/MCTargetDesc/CMakeLists.txt
===================================================================
--- lib/Target/X86/MCTargetDesc/CMakeLists.txt
+++ lib/Target/X86/MCTargetDesc/CMakeLists.txt
@@ -3,6 +3,7 @@
   X86MCTargetDesc.cpp
   X86MCAsmInfo.cpp
   X86MCCodeEmitter.cpp
+  X86MCCodePadder.cpp
   X86MachObjectWriter.cpp
   X86ELFObjectWriter.cpp
   X86WinCOFFStreamer.cpp
Index: lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
===================================================================
--- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86MCCodePadder.h"
 #include "MCTargetDesc/X86FixupKinds.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/BinaryFormat/ELF.h"
@@ -75,7 +76,7 @@
   const uint64_t MaxNopLength;
 public:
   X86AsmBackend(const Target &T, StringRef CPU)
-      : MCAsmBackend(), CPU(CPU),
+      : MCAsmBackend(new X86::X86MCCodePadder(CPU)), CPU(CPU),
         MaxNopLength((CPU == "slm") ? 7 : 15) {
     HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" &&
               CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" &&
Index: lib/Target/X86/MCTargetDesc/X86MCCodePadder.h
===================================================================
--- lib/Target/X86/MCTargetDesc/X86MCCodePadder.h
+++ lib/Target/X86/MCTargetDesc/X86MCCodePadder.h
@@ -0,0 +1,95 @@
+//===-- X86MCCodePadder.h - X86 Specific Code Padding Handling --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCCODEPADDER_H
+#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCCODEPADDER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCCodePadder.h"
+
+namespace llvm {
+
+class MCPaddingFragment;
+class MCAsmLayout;
+
+namespace X86 {
+
+/// The X86-specific class incharge of all code padding decisions for the X86
+/// target.
+class X86MCCodePadder : public MCCodePadder {
+  X86MCCodePadder() = delete;
+  X86MCCodePadder(const X86MCCodePadder &) = delete;
+  void operator=(const X86MCCodePadder &) = delete;
+
+protected:
+  bool basicBlockRequiresInsertionPoint(
+      const MCCodePaddingContext &Context) override;
+
+  bool usePoliciesForBasicBlock(const MCCodePaddingContext &Context) override;
+
+public:
+  X86MCCodePadder(StringRef CPU);
+  virtual ~X86MCCodePadder() {}
+};
+
+/// A padding policy designed to avoid the case of too branches with the same
+/// target address in the same instruction window.
+///
+/// In the Intelｮ Architectures Optimization Reference Manual, under clause
+/// 3.4.1, Branch Prediction Optimization, the following optimization is
+/// suggested: "Avoid putting two conditional branch instructions in a loop so
+/// that both have the same branch target address and, at the same time, belong
+/// to (i.e.have their last bytes' addresses within) the same 16-byte aligned
+/// code block.".
+/// This policy helps avoid this by inserting MCPaddingFragments before
+/// hazardous instructions (i.e. jmps whose target address is computable at
+/// compilation time) and returning positive penalty weight for 16B windows that
+/// contain this situation.
+class BranchesWithSameTargetAvoidancePolicy : public MCCodePaddingPolicy {
+  BranchesWithSameTargetAvoidancePolicy(
+      const BranchesWithSameTargetAvoidancePolicy &) = delete;
+  void operator=(const BranchesWithSameTargetAvoidancePolicy &) = delete;
+
+protected:
+  /// Computes the penalty weight caused by having branches with the same target
+  /// in a given instruction windows.
+  /// The weight will increase for each two or more branches with the same
+  /// target.
+  ///
+  /// \param Window The instruction window.
+  /// \param Offset The offset of the parent section.
+  /// \param Layout Code layout information.
+  ///
+  /// \returns the penalty weight caused by having branches with the same target
+  /// in \p Window
+  double computeWindowPenaltyWeight(const MCPFRange &Window, uint64_t Offset,
+                                    MCAsmLayout &Layout) const override;
+
+public:
+  BranchesWithSameTargetAvoidancePolicy();
+  virtual ~BranchesWithSameTargetAvoidancePolicy() {}
+
+  /// Determines if an instruction may cause the case of branches with the same
+  /// target in a window.
+  ///
+  /// An instruction is considered hazardous by this policy if it a jmp whose
+  /// target address is computable at compilation time, since two or more such
+  /// jmps to the same target address will cause performance penalty.
+  ///
+  /// \param Inst Instruction to examine.
+  ///
+  /// \returns true iff \p Inst is a jmp whose target address is computable at
+  /// compilation time.
+  bool instructionRequiresPaddingFragment(const MCInst &Inst) const override;
+};
+
+} // namespace X86
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCCODEPADDER_H
Index: lib/Target/X86/MCTargetDesc/X86MCCodePadder.cpp
===================================================================
--- lib/Target/X86/MCTargetDesc/X86MCCodePadder.cpp
+++ lib/Target/X86/MCTargetDesc/X86MCCodePadder.cpp
@@ -0,0 +1,182 @@
+///=== X86MCCodePadder.cpp - X86 Specific Code Padding Handling -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86MCCodePadder.h"
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+namespace X86 {
+
+enum PerfNopFragmentKind {
+  BranchesWithSameTargetAvoidance = MCPaddingFragment::FirstTargetPerfNopFragmentKind,
+  TooManyWaysAvoidance,
+  SplitInstInBranchTargetAvoidance
+};
+
+//---------------------------------------------------------------------------
+// X86MCCodePadder
+//
+
+X86MCCodePadder::X86MCCodePadder(StringRef CPU) {
+
+  if (CPU != "sandybridge" && CPU != "corei7-avx" && CPU != "ivybridge" &&
+      CPU != "core-avx-i" && CPU != "haswell" && CPU != "core-avx2" &&
+      CPU != "broadwell" && CPU != "skylake")
+    return;
+
+  addPolicy(new BranchesWithSameTargetAvoidancePolicy());
+}
+
+bool X86MCCodePadder::basicBlockRequiresInsertionPoint(
+    const MCCodePaddingContext &Context) {
+  // Insertion points are places that, if contain padding, then this padding
+  // will never be executed (unreachable code).
+  return MCCodePadder::basicBlockRequiresInsertionPoint(Context) ||
+         (!Context.IsBasicBlockReachableViaFallthrough &&
+          OS->getCurrentFragment()->getKind() != MCFragment::FT_Align);
+}
+
+bool X86MCCodePadder::usePoliciesForBasicBlock(
+    const MCCodePaddingContext &Context) {
+  return MCCodePadder::usePoliciesForBasicBlock(Context) &&
+         Context.IsBasicBlockInsideInnermostLoop;
+}
+
+//---------------------------------------------------------------------------
+// Utility functions
+//
+
+static bool isFarOrIndirectUncoditionalJump(const MCInst &Inst) {
+  unsigned int opcode = Inst.getOpcode();
+  return
+      // Far jmps
+      opcode == FARJMP16i || opcode == FARJMP16m || opcode == FARJMP32i ||
+      opcode == FARJMP32m || opcode == FARJMP64 ||
+      // Memory and register jmps
+      opcode == JMP16m || opcode == JMP16r || opcode == JMP32m ||
+      opcode == JMP32r || opcode == JMP64m || opcode == JMP64r;
+}
+
+static bool isJump(const MCInst &Inst) {
+  unsigned int opcode = Inst.getOpcode();
+  return
+      // Immidiate conditional jmps
+      opcode == JAE_1 || opcode == JAE_2 || opcode == JAE_4 || opcode == JA_1 ||
+      opcode == JA_2 || opcode == JA_4 || opcode == JBE_1 || opcode == JBE_2 ||
+      opcode == JBE_4 || opcode == JB_1 || opcode == JB_2 || opcode == JB_4 ||
+      opcode == JCXZ || opcode == JECXZ || opcode == JE_1 || opcode == JE_2 ||
+      opcode == JE_4 || opcode == JGE_1 || opcode == JGE_2 || opcode == JGE_4 ||
+      opcode == JG_1 || opcode == JG_2 || opcode == JG_4 || opcode == JLE_1 ||
+      opcode == JLE_2 || opcode == JLE_4 || opcode == JL_1 || opcode == JL_2 ||
+      opcode == JL_4 || opcode == JNE_1 || opcode == JNE_2 || opcode == JNE_4 ||
+      opcode == JNO_1 || opcode == JNO_2 || opcode == JNO_4 ||
+      opcode == JNP_1 || opcode == JNP_2 || opcode == JNP_4 ||
+      opcode == JNS_1 || opcode == JNS_2 || opcode == JNS_4 || opcode == JO_1 ||
+      opcode == JO_2 || opcode == JO_4 || opcode == JP_1 || opcode == JP_2 ||
+      opcode == JP_4 || opcode == JRCXZ || opcode == JS_1 || opcode == JS_2 ||
+      opcode == JS_4 ||
+      // immidiate unconditional jmps
+      opcode == JMP_1 || opcode == JMP_2 || opcode == JMP_4 ||
+      // Other unconditional jmps
+      isFarOrIndirectUncoditionalJump(Inst);
+}
+
+static const MCSymbol *getBranchLabel(const MCInst &Inst) {
+  if (isFarOrIndirectUncoditionalJump(Inst))
+    return nullptr;
+
+  if (Inst.getNumOperands() != 1)
+    return nullptr;
+
+  const MCOperand &FirstOperand = Inst.getOperand(0);
+  if (!FirstOperand.isExpr())
+    return nullptr;
+
+  if (FirstOperand.getExpr()->getKind() != MCExpr::SymbolRef)
+    return nullptr;
+
+  const MCSymbolRefExpr *RefExpr =
+      static_cast<const MCSymbolRefExpr *>(FirstOperand.getExpr());
+  const MCSymbol *RefSymbol = &RefExpr->getSymbol();
+
+  if (RefSymbol->isCommon() || RefSymbol->isVariable())
+    // not an offset symbol
+    return nullptr;
+
+  return RefSymbol;
+}
+
+static bool computeBranchTargetAddress(const MCInst &Inst, MCAsmLayout const &Layout,
+                                uint64_t &TargetAddress) {
+  const MCSymbol *RefSymbol = getBranchLabel(Inst);
+  if (RefSymbol == nullptr)
+    return false;
+  return Layout.getSymbolOffset(*RefSymbol, TargetAddress);
+}
+
+//---------------------------------------------------------------------------
+// BranchesWithSameTargetAvoidancePolicy
+//
+
+BranchesWithSameTargetAvoidancePolicy::BranchesWithSameTargetAvoidancePolicy()
+    : MCCodePaddingPolicy(BranchesWithSameTargetAvoidance, UINT64_C(16), true) {
+}
+
+bool BranchesWithSameTargetAvoidancePolicy::instructionRequiresPaddingFragment(
+    const MCInst &Inst) const {
+  if (!isJump(Inst))
+    return false;
+  // label must be computable in compilation time
+  return getBranchLabel(Inst) != nullptr;
+}
+
+double BranchesWithSameTargetAvoidancePolicy::computeWindowPenaltyWeight(
+    const MCPFRange &Window, uint64_t Offset, MCAsmLayout &Layout) const {
+
+  static const double COLLISION_WEIGHT = 1.0;
+
+  double Weight = 0.0;
+
+  SmallPtrSet<const MCSymbol *, 8> BranchTargetLabels;
+  SmallSet<uint64_t, 8> BranchTargetAddresses;
+  for (const MCPaddingFragment *Fragment : Window) {
+    const MCSymbol *TargetLabel = getBranchLabel(Fragment->getInst());
+    assert(TargetLabel != nullptr && "Label must be computable");
+
+    if (BranchTargetLabels.count(TargetLabel)) {
+      // There's already a branch pointing to that label in this window
+      Weight += COLLISION_WEIGHT;
+      continue;
+    }
+    BranchTargetLabels.insert(TargetLabel);
+
+    uint64_t TargetAddress = UINT64_C(0);
+    if (!computeBranchTargetAddress(Fragment->getInst(), Layout, TargetAddress))
+      continue;
+    if (BranchTargetAddresses.count(TargetAddress))
+      // There's already a branch pointing to that address in this window
+      Weight += COLLISION_WEIGHT;
+    else
+      BranchTargetAddresses.insert(TargetAddress);
+  }
+
+  return Weight;
+}
+
+} // namespace X86
+} // namespace llvm
Index: test/CodeGen/X86/perf_nops.mir
===================================================================
--- test/CodeGen/X86/perf_nops.mir
+++ test/CodeGen/X86/perf_nops.mir
@@ -0,0 +1,314 @@
+# RUN: llc -mcpu=haswell -filetype=obj -start-before stack-protector -O2 %s -o - | llvm-objdump -d - | FileCheck %s
+
+# Source C code:
+# volatile int y;
+# volatile int x;
+# 
+# int perfNopsInsertion(int z, int w) {
+# 	int result = 0;
+# 	while (x > 0 && y < 0) {
+# 		switch(z) {
+# 			case 0:
+# 			result++; break;
+# 			case 1:
+# 			result--; break;
+# 			case 2:
+# 			result *= result; break;
+# 		}
+# 		
+# 		if(z == 8 && y == 9) {
+# 			result += 7;
+# 		}
+# 		else if (z < 6 || y > 11) {
+# 			result -= 18;
+# 		}
+# 		else if (z > 35 != y <= 55) {
+# 			result *= 19;
+# 		}
+# 	}
+# 	return result;
+# }
+--- |
+  ; ModuleID = 'perf_nops.ll'
+  source_filename = "perf_nops.c"
+  target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+  
+  @x = common global i32 0, align 4
+  @y = common global i32 0, align 4
+  
+  ; Function Attrs: norecurse nounwind
+  define i32 @perfNopsInsertion(i32 %z, i32 %w) local_unnamed_addr #0 {
+  entry:
+    %0 = load volatile i32, i32* @x, align 4, !tbaa !1
+    %cmp37 = icmp sgt i32 %0, 0
+    br i1 %cmp37, label %land.rhs.lr.ph, label %while.end
+  
+  land.rhs.lr.ph:                                   ; preds = %entry
+    br label %land.rhs
+  
+  land.rhs:                                         ; preds = %while.cond.backedge, %land.rhs.lr.ph
+    %result.038 = phi i32 [ 0, %land.rhs.lr.ph ], [ %result.0.be, %while.cond.backedge ]
+    %1 = load volatile i32, i32* @y, align 4, !tbaa !1
+    %cmp1 = icmp slt i32 %1, 0
+    br i1 %cmp1, label %while.body, label %while.end
+  
+  while.body:                                       ; preds = %land.rhs
+    switch i32 %z, label %if.else [
+      i32 0, label %sw.bb
+      i32 1, label %sw.bb2
+      i32 2, label %sw.bb3
+      i32 8, label %land.lhs.true
+    ]
+  
+  sw.bb:                                            ; preds = %while.body
+    %inc = add nsw i32 %result.038, 1
+    br label %if.then8
+  
+  sw.bb2:                                           ; preds = %while.body
+    %dec = add nsw i32 %result.038, -1
+    br label %if.then8
+  
+  sw.bb3:                                           ; preds = %while.body
+    %mul = mul nsw i32 %result.038, %result.038
+    br label %if.then8
+  
+  land.lhs.true:                                    ; preds = %while.body
+    %2 = load volatile i32, i32* @y, align 4, !tbaa !1
+    %cmp5 = icmp eq i32 %2, 9
+    br i1 %cmp5, label %if.then, label %lor.lhs.false
+  
+  if.then:                                          ; preds = %land.lhs.true
+    %add = add nsw i32 %result.038, 7
+    br label %while.cond.backedge
+  
+  if.else:                                          ; preds = %while.body
+    %3 = icmp slt i32 %z, 6
+    br i1 %3, label %if.then8, label %lor.lhs.false
+  
+  lor.lhs.false:                                    ; preds = %if.else, %land.lhs.true
+    %4 = load volatile i32, i32* @y, align 4, !tbaa !1
+    %cmp7 = icmp sgt i32 %4, 11
+    br i1 %cmp7, label %if.then8, label %if.else9
+  
+  if.then8:                                         ; preds = %lor.lhs.false, %if.else, %sw.bb3, %sw.bb2, %sw.bb
+    %result.13133 = phi i32 [ %result.038, %lor.lhs.false ], [ %result.038, %if.else ], [ %inc, %sw.bb ], [ %dec, %sw.bb2 ], [ %mul, %sw.bb3 ]
+    %sub = add nsw i32 %result.13133, -18
+    br label %while.cond.backedge
+  
+  while.cond.backedge:                              ; preds = %if.else9, %if.then8, %if.then
+    %result.0.be = phi i32 [ %add, %if.then ], [ %sub, %if.then8 ], [ %mul16.result.1, %if.else9 ]
+    %5 = load volatile i32, i32* @x, align 4, !tbaa !1
+    %cmp = icmp sgt i32 %5, 0
+    br i1 %cmp, label %land.rhs, label %while.end
+  
+  if.else9:                                         ; preds = %lor.lhs.false
+    %6 = icmp sgt i32 %z, 35
+    %7 = load volatile i32, i32* @y, align 4, !tbaa !1
+    %cmp11 = icmp slt i32 %7, 56
+    %cmp13 = xor i1 %6, %cmp11
+    %mul16 = mul nsw i32 %result.038, 19
+    %mul16.result.1 = select i1 %cmp13, i32 %mul16, i32 %result.038
+    br label %while.cond.backedge
+  
+  while.end:                                        ; preds = %land.rhs, %while.cond.backedge, %entry
+    %result.0.lcssa = phi i32 [ 0, %entry ], [ %result.0.be, %while.cond.backedge ], [ %result.038, %land.rhs ]
+    ret i32 %result.0.lcssa
+  }
+  
+  attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  
+  !llvm.ident = !{!0}
+  
+  !0 = !{!"clang version 5.0.0 (cfe/trunk)"}
+  !1 = !{!2, !2, i64 0}
+  !2 = !{!"int", !3, i64 0}
+  !3 = !{!"omnipotent char", !4, i64 0}
+  !4 = !{!"Simple C/C++ TBAA"}
+
+...
+---
+name:            perfNopsInsertion
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+liveins:         
+  - { reg: '%ecx' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    8
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+jumpTable:       
+  kind:            label-difference32
+  entries:         
+    - id:              0
+      blocks:          [ '%bb.6.sw.bb', '%bb.7.sw.bb2', '%bb.8.sw.bb3', 
+                         '%bb.11.if.else', '%bb.11.if.else', '%bb.11.if.else', 
+                         '%bb.11.if.else', '%bb.11.if.else', '%bb.9.land.lhs.true' ]
+
+# The test:    Two branches in the same 16B window with the same target address.
+#              Expect insertion of nops in unreachable code space to avoid this.
+# CHECK:       4f: eb 3f     jmp     63      <perfNopsInsertion+0x90>
+# CHECK-NEXT:  51: 66 0f 1f 84 00 00 00 00 00      nopw    (%rax,%rax)
+# CHECK:       5f: eb 2f     jmp     47      <perfNopsInsertion+0x90>
+# CHECK-NEXT:  61:
+
+body:             |
+  bb.0.entry:
+    successors: %bb.2.land.rhs.lr.ph(0x50000000), %bb.1(0x30000000)
+    liveins: %ecx
+  
+    CMP32mi8 %rip, 1, _, @x, _, 0, implicit-def %eflags :: (volatile dereferenceable load 4 from @x, !tbaa !1)
+    JG_1 %bb.2.land.rhs.lr.ph, implicit killed %eflags
+  
+  bb.1:
+    successors: %bb.16.while.end(0x80000000)
+  
+    %eax = MOV32r0 implicit-def dead %eflags
+    JMP_1 %bb.16.while.end
+  
+  bb.2.land.rhs.lr.ph:
+    successors: %bb.3.land.rhs(0x80000000)
+    liveins: %ecx
+  
+    %eax = MOV32r0 implicit-def dead %eflags
+    %r9d = MOV32rr %ecx, implicit-def %r9
+    %r8 = LEA64r %rip, 1, _, %jump-table.0, _
+  
+  bb.3.land.rhs:
+    successors: %bb.4.while.body(0x7c000000), %bb.19(0x04000000)
+    liveins: %eax, %ecx, %r8, %r9
+  
+    CMP32mi8 %rip, 1, _, @y, _, 0, implicit-def %eflags :: (volatile dereferenceable load 4 from @y, !tbaa !1)
+    JS_1 %bb.4.while.body, implicit killed %eflags
+  
+  bb.19:
+    successors: %bb.16.while.end(0x80000000)
+    liveins: %eax
+  
+    JMP_1 %bb.16.while.end
+  
+  bb.4.while.body:
+    successors: %bb.11.if.else(0x0ccccccb), %bb.5.while.body(0x73333335)
+    liveins: %eax, %ecx, %r8, %r9
+  
+    CMP32ri8 %ecx, 8, implicit-def %eflags
+    JA_1 %bb.11.if.else, implicit killed %eflags
+  
+  bb.5.while.body:
+    successors: %bb.6.sw.bb(0x1c71c71c), %bb.7.sw.bb2(0x1c71c71c), %bb.8.sw.bb3(0x1c71c71c), %bb.11.if.else(0x0e38e38e), %bb.9.land.lhs.true(0x1c71c71c)
+    liveins: %eax, %ecx, %r8, %r9
+  
+    %rdx = MOVSX64rm32 %r8, 4, %r9, 0, _ :: (load 4 from jump-table)
+    %rdx = ADD64rr killed %rdx, %r8, implicit-def dead %eflags
+    JMP64r killed %rdx
+  
+  bb.6.sw.bb:
+    successors: %bb.13.if.then8(0x80000000)
+    liveins: %eax, %ecx, %r8, %r9
+  
+    %eax = ADD32ri8 killed %eax, 1, implicit-def dead %eflags
+    JMP_1 %bb.13.if.then8
+  
+  bb.7.sw.bb2:
+    successors: %bb.13.if.then8(0x80000000)
+    liveins: %eax, %ecx, %r8, %r9
+  
+    %eax = ADD32ri8 killed %eax, -1, implicit-def dead %eflags
+    JMP_1 %bb.13.if.then8
+  
+  bb.8.sw.bb3:
+    successors: %bb.13.if.then8(0x80000000)
+    liveins: %eax, %ecx, %r8, %r9
+  
+    %eax = IMUL32rr killed %eax, %eax, implicit-def dead %eflags
+    JMP_1 %bb.13.if.then8
+  
+  bb.9.land.lhs.true:
+    successors: %bb.10.if.then(0x40000000), %bb.12.lor.lhs.false(0x40000000)
+    liveins: %eax, %ecx, %r8, %r9
+  
+    %edx = MOV32rm %rip, 1, _, @y, _ :: (volatile dereferenceable load 4 from @y, !tbaa !1)
+    CMP32ri8 killed %edx, 9, implicit-def %eflags
+    JNE_1 %bb.12.lor.lhs.false, implicit killed %eflags
+    JMP_1 %bb.10.if.then
+  
+  bb.10.if.then:
+    successors: %bb.14.while.cond.backedge(0x80000000)
+    liveins: %eax, %ecx, %r8, %r9
+  
+    %eax = ADD32ri8 killed %eax, 7, implicit-def dead %eflags
+    JMP_1 %bb.14.while.cond.backedge
+  
+  bb.11.if.else:
+    successors: %bb.17(0x40000000), %bb.12.lor.lhs.false(0x40000000)
+    liveins: %eax, %ecx, %r8, %r9
+  
+    CMP32ri8 %ecx, 6, implicit-def %eflags
+    JGE_1 %bb.12.lor.lhs.false, implicit killed %eflags
+  
+  bb.17:
+    successors: %bb.13.if.then8(0x80000000)
+    liveins: %eax, %ecx, %r8, %r9
+  
+    JMP_1 %bb.13.if.then8
+  
+  bb.12.lor.lhs.false:
+    successors: %bb.18(0x40000000), %bb.15.if.else9(0x40000000)
+    liveins: %eax, %ecx, %r8, %r9
+  
+    %edx = MOV32rm %rip, 1, _, @y, _ :: (volatile dereferenceable load 4 from @y, !tbaa !1)
+    CMP32ri8 killed %edx, 12, implicit-def %eflags
+    JL_1 %bb.15.if.else9, implicit killed %eflags
+  
+  bb.18:
+    successors: %bb.13.if.then8(0x80000000)
+    liveins: %eax, %ecx, %r8, %r9
+  
+  
+  bb.13.if.then8:
+    successors: %bb.14.while.cond.backedge(0x80000000)
+    liveins: %eax, %ecx, %r8, %r9
+  
+    %eax = ADD32ri8 killed %eax, -18, implicit-def dead %eflags
+  
+  bb.14.while.cond.backedge:
+    successors: %bb.3.land.rhs(0x7c000000), %bb.16.while.end(0x04000000)
+    liveins: %eax, %ecx, %r8, %r9
+  
+    CMP32mi8 %rip, 1, _, @x, _, 0, implicit-def %eflags :: (volatile dereferenceable load 4 from @x, !tbaa !1)
+    JG_1 %bb.3.land.rhs, implicit killed %eflags
+    JMP_1 %bb.16.while.end
+  
+  bb.15.if.else9:
+    successors: %bb.14.while.cond.backedge(0x80000000)
+    liveins: %eax, %ecx, %r8, %r9
+  
+    CMP32ri8 %ecx, 35, implicit-def %eflags
+    %r10b = SETGr implicit killed %eflags
+    %edx = MOV32rm %rip, 1, _, @y, _ :: (volatile dereferenceable load 4 from @y, !tbaa !1)
+    CMP32ri8 killed %edx, 56, implicit-def %eflags
+    %dl = SETLr implicit killed %eflags
+    %r11d = IMUL32rri8 %eax, 19, implicit-def dead %eflags
+    dead %dl = XOR8rr killed %dl, killed %r10b, implicit-def %eflags
+    %eax = CMOVNE32rr killed %eax, killed %r11d, implicit killed %eflags
+    JMP_1 %bb.14.while.cond.backedge
+  
+  bb.16.while.end:
+    liveins: %eax
+  
+    RET 0, %eax
+
+...