diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6042,6 +6042,233 @@
   return SDValue();
 }
 
+static SDValue foldUnsignedBoundsCheck(SDNode *LogicOp, SelectionDAG &DAG) {
+  SDValue LHS = LogicOp->getOperand(0);
+  SDValue RHS = LogicOp->getOperand(1);
+  if (!LHS.getOperand(0).getValueType().isInteger())
+    return SDValue();
+  if (!RHS.getOperand(0).getValueType().isInteger())
+    return SDValue();
+
+  // We are looking for something along the lines of:
+  // Val u>/u>= LowerBound && Val u</u<= UpperBound
+  // Or
+  // Val u</u<= LowerBound || Val u>/u>= UpperBound
+  //
+  // With that pattern we can often reduce it to:
+  // Val u</u<= (UpperBound - LowerBound)
+  //
+  // Which saves instructions and often a branch.
+  //
+  // The idea is with unsigned bounds if Val < LowerBound, Val - LowBound will
+  // overflow and be greater than (UpperBound - LowerBound). If Val >
+  // UpperBound, then Val - LowBound will just trivially be greater than
+  // UpperBound - LowerBound.
+
+  // Return if we know either Hi u> Lo or if OrEq is set then Hi u>= Lo. First
+  // checks a few basic patterns, then falls back to knownbits.
+  auto IsBoundPair = [&DAG](SDValue Lo, SDValue Hi, bool OrEq) {
+    switch (Hi.getOpcode()) {
+    case ISD::ADD:
+      // x nuw+ y u>= x,y
+      [[fallthrough]];
+    case ISD::MUL:
+      // x nuw* y u>= x if y != 0
+    case ISD::SHL:
+      // x nuw<< y u>= x
+      if (!Hi->getFlags().hasNoUnsignedWrap())
+        break;
+      [[fallthrough]];
+    case ISD::OR:
+      // x | y u>= x,y
+      for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) {
+        if (Hi.getOperand(OpIdx) == Lo) {
+          if (OrEq)
+            return Hi.getOpcode() != ISD::MUL ||
+                   DAG.isKnownNeverZero(Hi.getOperand(1 - OpIdx));
+          // x nuw+ y u> x if y != 0
+          if (Hi.getOpcode() == ISD::ADD)
+            return DAG.isKnownNeverZero(Hi.getOperand(1 - OpIdx));
+          // x nuw<< y u> x if x != 0 && y != 0
+          if (Hi.getOpcode() == ISD::SHL)
+            return DAG.isKnownNeverZero(Hi.getOperand(OpIdx)) &&
+                   DAG.isKnownNeverZero(Hi.getOperand(1 - OpIdx));
+          // x nuw<< y u> x if x != 0 && y u> 1
+          if (Hi.getOpcode() == ISD::MUL)
+            return DAG.isKnownNeverZero(Hi.getOperand(OpIdx)) &&
+                   DAG.computeKnownBits(Hi.getOperand(1 - OpIdx))
+                       .getMinValue()
+                       .ugt(1);
+          // x | y u> x if (~x & y) != 0
+          KnownBits Known0 = DAG.computeKnownBits(Hi.getOperand(1 - OpIdx));
+          if (Known0.isUnknown())
+            return false;
+          KnownBits Known1 = DAG.computeKnownBits(Hi.getOperand(OpIdx));
+
+          // ~ for KnownBits
+          std::swap(Known0.One, Known0.Zero);
+          return (Known0 & Known1).isNonZero();
+        }
+        if (Hi.getOpcode() == ISD::SHL)
+          break;
+      }
+      break;
+    default:
+      break;
+    }
+
+    switch (Lo.getOpcode()) {
+    case ISD::SUB:
+      // x nuw- y u<= x
+      if (!Lo->getFlags().hasNoUnsignedWrap())
+        break;
+      [[fallthrough]];
+    case ISD::SRL:
+      // x >> y u<= x
+    case ISD::UDIV:
+      // x / y u<= x
+      if (Lo.getOperand(0) == Hi) {
+        if (OrEq)
+          return true;
+
+        if (!DAG.isKnownNeverZero(Lo.getOperand(1)))
+          return false;
+
+        // x nuw- y u< x if y != 0
+        if(Lo.getOpcode() == ISD::SUB)
+            return true;
+
+        // x nuw>> y u< x if x != 0 && y != 0
+        if (Lo.getOpcode() == ISD::SRL)
+            return DAG.isKnownNeverZero(Lo.getOperand(0));
+
+        // x / y u< x if x != 0 && y u> 1
+        return DAG.computeKnownBits(Lo.getOperand(0)).getMaxValue().ugt(1);
+      }
+      break;
+    default:
+      break;
+    }
+
+    // No obvious opcode for defining the relationship, so use knownbits.
+    KnownBits KnownHi = DAG.computeKnownBits(Hi);
+    if (KnownHi.isUnknown())
+      return false;
+    KnownBits KnownLo = DAG.computeKnownBits(Lo);
+    std::optional<bool> OkayOrder = OrEq ? KnownBits::uge(KnownHi, KnownLo)
+                                         : KnownBits::ugt(KnownHi, KnownLo);
+    return OkayOrder.has_value() && *OkayOrder;
+  };
+
+  bool IsAnd = LogicOp->getOpcode() == ISD::AND;
+  SDValue BoundLo, BoundHi, Val;
+  ISD::CondCode CCLo, CCHi;
+  EVT VT = LogicOp->getValueType(0);
+
+  // Set: BoundLo, BoundHi, Val, CCLo, and CCHi using arguments OpLo and OpHi
+  auto TryFindBoundsPattern = [&](SDValue OpLo, SDValue OpHi) {
+    CCLo = cast<CondCodeSDNode>(OpLo.getOperand(2))->get();
+    // In the Or case, we are looking for exclusion from bounds, so invert
+    // condition.
+    if (!IsAnd)
+      CCLo = ISD::getSetCCInverse(CCLo, VT);
+
+    // Get Val and BoundLo based on what CCLo is.
+    switch (CCLo) {
+    case ISD::CondCode::SETUGT:
+    case ISD::CondCode::SETUGE:
+      Val = OpLo->getOperand(0);
+      BoundLo = OpLo->getOperand(1);
+      break;
+    case ISD::CondCode::SETULT:
+    case ISD::CondCode::SETULE:
+      Val = OpLo->getOperand(1);
+      BoundLo = OpLo->getOperand(0);
+      break;
+    default:
+      return false;
+    };
+
+    CCHi = cast<CondCodeSDNode>(OpHi.getOperand(2))->get();
+    // In the Or case, we are looking for exclusion from bounds, so invert
+    // condition.
+    if (!IsAnd)
+      CCHi = ISD::getSetCCInverse(CCHi, VT);
+
+    // Get BoundHi and verify we match Val based on CCHi.
+    switch (CCHi) {
+    case ISD::CondCode::SETUGT:
+    case ISD::CondCode::SETUGE:
+      if (Val != OpHi->getOperand(1))
+        return false;
+      BoundHi = OpHi->getOperand(0);
+      break;
+    case ISD::CondCode::SETULT:
+    case ISD::CondCode::SETULE:
+      if (Val != OpHi->getOperand(0))
+        return false;
+      BoundHi = OpHi->getOperand(1);
+      break;
+    default:
+      return false;
+    };
+
+    // Transform aren't always valid with undef bounds (they are okay still if
+    // BoundHi > BoundLo, but not if BoundHi == BoundLo). Undefs are uncommon
+    // enough we can just exclude them.
+    if (BoundLo.isUndef() || BoundHi.isUndef() || Val.isUndef())
+      return false;
+
+    // Return true if BoundHi/BoundLo form bounds around Val.
+    return IsBoundPair(BoundLo, BoundHi, ISD::isTrueWhenEqual(CCLo));
+  };
+
+  if (!TryFindBoundsPattern(LHS, RHS) && !TryFindBoundsPattern(RHS, LHS))
+    return SDValue();
+
+  EVT OpVT = Val.getValueType();
+  SDLoc DL(LogicOp);
+
+  // We might need to adjust these values if CCLo is not inclusive.
+  SDValue SubValBy = BoundLo;
+  SDValue SubHiBy = BoundLo;
+
+  ISD::CondCode NewCC = ISD::isTrueWhenEqual(CCHi) ? ISD::CondCode::SETULE
+                                                   : ISD::CondCode::SETULT;
+
+  // Nearly all of the intermediate nodes can have nuw flag.
+  SDNodeFlags NUWFlag;
+  NUWFlag.setNoUnsignedWrap(true);
+
+  // If low bound comparison doesn't include zero, we need to subtract by one
+  // more to overflow on Val == BoundLo.
+  if(!ISD::isTrueWhenEqual(CCLo)) {
+    // We are only here if BoundHi > BoundLo which is impossible if BoundLo ==
+    // UINT_MAX so +1 will never overflow.
+    SubValBy = DAG.getNode(ISD::ADD, DL, OpVT, SubValBy,
+                           DAG.getConstant(1, DL, OpVT), NUWFlag);
+
+    // If condition on upperbound is inclusive, we can fix the off by one by
+    // just switching to exclusive bound. Otherwise we need to decrease size by
+    // one. Do so by reusing our lo + 1: hi - (lo + 1) -> sz - 1.
+    if (NewCC == ISD::CondCode::SETULE)
+      NewCC = ISD::CondCode::SETULT;
+    else
+      SubHiBy = SubValBy;
+  }
+
+  // BoundHi must be u>= BoundLo so the subtract never has unsigned overflow.
+  SDValue BoundSz = DAG.getNode(ISD::SUB, DL, OpVT, BoundHi, SubHiBy, NUWFlag);
+  SDValue SubbedVal = DAG.getNode(ISD::SUB, DL, OpVT, Val, SubValBy);
+
+  // In Or case we inverted the conditions (so we could shared match code with
+  // And). Need to un-invert before creating final op.
+  if (!IsAnd)
+    NewCC = ISD::getSetCCInverse(NewCC, VT);
+
+  return DAG.getSetCC(DL, VT, SubbedVal, BoundSz, NewCC);
+}
+
 static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
   using AndOrSETCCFoldKind = TargetLowering::AndOrSETCCFoldKind;
   assert(
@@ -6055,6 +6282,9 @@
       !LHS->hasOneUse() || !RHS->hasOneUse())
     return SDValue();
 
+  if (SDValue R = foldUnsignedBoundsCheck(LogicOp, DAG))
+    return R;
+
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   AndOrSETCCFoldKind TargetPreference = TLI.isDesirableToCombineLogicOpOfSETCC(
       LogicOp, LHS.getNode(), RHS.getNode());
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4052,6 +4052,11 @@
       // it.
       IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
 
+      // Inbounds GEP scaliung is guranteed to be NSW.
+      SDNodeFlags ScaleFlags;
+      if (cast<GEPOperator>(I).isInBounds())
+        ScaleFlags.setNoSignedWrap(true);
+
       if (ElementScalable) {
         EVT VScaleTy = N.getValueType().getScalarType();
         SDValue VScale = DAG.getNode(
@@ -4059,27 +4064,33 @@
             DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy));
         if (IsVectorGEP)
           VScale = DAG.getSplatVector(N.getValueType(), dl, VScale);
-        IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale);
+        IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale, ScaleFlags);
       } else {
         // If this is a multiply by a power of two, turn it into a shl
         // immediately.  This is a very common case.
         if (ElementMul != 1) {
           if (ElementMul.isPowerOf2()) {
             unsigned Amt = ElementMul.logBase2();
-            IdxN = DAG.getNode(ISD::SHL, dl,
-                               N.getValueType(), IdxN,
-                               DAG.getConstant(Amt, dl, IdxN.getValueType()));
+            IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN,
+                               DAG.getConstant(Amt, dl, IdxN.getValueType()),
+                               ScaleFlags);
           } else {
             SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
                                             IdxN.getValueType());
-            IdxN = DAG.getNode(ISD::MUL, dl,
-                               N.getValueType(), IdxN, Scale);
+            IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale,
+                               ScaleFlags);
           }
         }
       }
-
-      N = DAG.getNode(ISD::ADD, dl,
-                      N.getValueType(), N, IdxN);
+      // Inbounds GEP cannot wrap around the address space if idx is
+      // non-negative. Not we don't need to actually check idxN (after scaling)
+      // as inbounds implies nsw for scaling.
+      SDNodeFlags AccumFlags;
+      if (cast<GEPOperator>(I).isInBounds())
+        AccumFlags.setNoUnsignedWrap(
+            llvm::isKnownNonNegative(Idx, DAG.getDataLayout()));
+
+      N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN, AccumFlags);
     }
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -278,30 +278,28 @@
 define amdgpu_kernel void @test_vop3_cmp_u32_sop_or(ptr addrspace(1) %arg) {
 ; GFX1032-LABEL: test_vop3_cmp_u32_sop_or:
 ; GFX1032:       ; %bb.0:
-; GFX1032-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX1032-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX1032-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT:    global_load_dword v1, v0, s[2:3]
+; GFX1032-NEXT:    global_load_dword v1, v0, s[0:1]
 ; GFX1032-NEXT:    s_waitcnt vmcnt(0)
-; GFX1032-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 3, v1
-; GFX1032-NEXT:    v_cmp_gt_u32_e64 s0, 2, v1
-; GFX1032-NEXT:    s_or_b32 s0, vcc_lo, s0
-; GFX1032-NEXT:    v_cndmask_b32_e64 v1, 2, 1, s0
-; GFX1032-NEXT:    global_store_dword v0, v1, s[2:3]
+; GFX1032-NEXT:    v_add_nc_u32_e32 v1, -2, v1
+; GFX1032-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 1, v1
+; GFX1032-NEXT:    v_cndmask_b32_e64 v1, 2, 1, vcc_lo
+; GFX1032-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX1032-NEXT:    s_endpgm
 ;
 ; GFX1064-LABEL: test_vop3_cmp_u32_sop_or:
 ; GFX1064:       ; %bb.0:
-; GFX1064-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX1064-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX1064-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT:    global_load_dword v1, v0, s[2:3]
+; GFX1064-NEXT:    global_load_dword v1, v0, s[0:1]
 ; GFX1064-NEXT:    s_waitcnt vmcnt(0)
-; GFX1064-NEXT:    v_cmp_lt_u32_e32 vcc, 3, v1
-; GFX1064-NEXT:    v_cmp_gt_u32_e64 s[0:1], 2, v1
-; GFX1064-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX1064-NEXT:    v_cndmask_b32_e64 v1, 2, 1, s[0:1]
-; GFX1064-NEXT:    global_store_dword v0, v1, s[2:3]
+; GFX1064-NEXT:    v_add_nc_u32_e32 v1, -2, v1
+; GFX1064-NEXT:    v_cmp_lt_u32_e32 vcc, 1, v1
+; GFX1064-NEXT:    v_cndmask_b32_e64 v1, 2, 1, vcc
+; GFX1064-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX1064-NEXT:    s_endpgm
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %lid
diff --git a/llvm/test/CodeGen/X86/icmp-in-unsigned-bounds.ll b/llvm/test/CodeGen/X86/icmp-in-unsigned-bounds.ll
--- a/llvm/test/CodeGen/X86/icmp-in-unsigned-bounds.ll
+++ b/llvm/test/CodeGen/X86/icmp-in-unsigned-bounds.ll
@@ -4,12 +4,9 @@
 define i1 @and_bounds_uge_lb_ult_ub_add(i32 %val, i32 %lb, i32 %sz) nounwind {
 ; CHECK-LABEL: and_bounds_uge_lb_ult_ub_add:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addl %esi, %edx
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    setae %cl
+; CHECK-NEXT:    subl %esi, %edi
 ; CHECK-NEXT:    cmpl %edx, %edi
 ; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    andb %cl, %al
 ; CHECK-NEXT:    retq
   %ub = add nuw i32 %lb, %sz
   %r0 = icmp uge i32 %val, %lb
@@ -42,11 +39,10 @@
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; CHECK-NEXT:    shrl %cl, %eax
-; CHECK-NEXT:    cmpl %edi, %eax
-; CHECK-NEXT:    seta %cl
+; CHECK-NEXT:    subl %eax, %esi
+; CHECK-NEXT:    subl %eax, %edi
 ; CHECK-NEXT:    cmpl %esi, %edi
 ; CHECK-NEXT:    setae %al
-; CHECK-NEXT:    orb %cl, %al
 ; CHECK-NEXT:    retq
   %lb = lshr i32 %ub, %sz
   %r0 = icmp ugt i32 %lb, %val
@@ -78,14 +74,15 @@
 define i1 @and_bounds_uge_lb_ult_ub_sub(i32 %val, i32 %ub, i32 %sz_in) nounwind {
 ; CHECK-LABEL: and_bounds_uge_lb_ult_ub_sub:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $edx killed $edx def $rdx
+; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
 ; CHECK-NEXT:    orl $1, %edx
-; CHECK-NEXT:    movl %esi, %eax
-; CHECK-NEXT:    subl %edx, %eax
-; CHECK-NEXT:    cmpl %eax, %edi
-; CHECK-NEXT:    seta %cl
+; CHECK-NEXT:    negl %edx
+; CHECK-NEXT:    leal 1(%rsi,%rdx), %eax
+; CHECK-NEXT:    subl %eax, %esi
+; CHECK-NEXT:    subl %eax, %edi
 ; CHECK-NEXT:    cmpl %esi, %edi
 ; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    andb %cl, %al
 ; CHECK-NEXT:    retq
   %sz = or i32 %sz_in, 1
   %lb = sub nuw i32 %ub, %sz
@@ -123,11 +120,11 @@
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    divl %ecx
-; CHECK-NEXT:    cmpl %edi, %eax
-; CHECK-NEXT:    setae %cl
-; CHECK-NEXT:    cmpl %edi, %esi
-; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    orb %cl, %al
+; CHECK-NEXT:    subl %eax, %esi
+; CHECK-NEXT:    notl %eax
+; CHECK-NEXT:    addl %edi, %eax
+; CHECK-NEXT:    cmpl %esi, %eax
+; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    retq
   %ub = or i32 %ub_in, 1
   %sz = or i32 %sz_in, 2
@@ -147,11 +144,11 @@
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    divl %ecx
-; CHECK-NEXT:    cmpl %edi, %eax
-; CHECK-NEXT:    setae %cl
-; CHECK-NEXT:    cmpl %edi, %esi
-; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    orb %cl, %al
+; CHECK-NEXT:    subl %eax, %esi
+; CHECK-NEXT:    notl %eax
+; CHECK-NEXT:    addl %edi, %eax
+; CHECK-NEXT:    cmpl %esi, %eax
+; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    retq
   %ub = or i32 %ub_in, 1
   %sz = or i32 %sz_in, 1
@@ -170,11 +167,11 @@
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    divl %ecx
-; CHECK-NEXT:    cmpl %edi, %eax
-; CHECK-NEXT:    setae %cl
-; CHECK-NEXT:    cmpl %edi, %esi
-; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    orb %cl, %al
+; CHECK-NEXT:    subl %eax, %esi
+; CHECK-NEXT:    notl %eax
+; CHECK-NEXT:    addl %edi, %eax
+; CHECK-NEXT:    cmpl %esi, %eax
+; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    retq
   %ub = or i32 %ub_in, 0
   %sz = or i32 %sz_in, 2
@@ -189,11 +186,10 @@
 ; CHECK-LABEL: and_bounds_uge_lb_uge_ub_or:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    orl %esi, %edx
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    setae %cl
-; CHECK-NEXT:    cmpl %edi, %edx
-; CHECK-NEXT:    setae %al
-; CHECK-NEXT:    andb %cl, %al
+; CHECK-NEXT:    subl %esi, %edi
+; CHECK-NEXT:    subl %esi, %edx
+; CHECK-NEXT:    cmpl %edx, %edi
+; CHECK-NEXT:    setbe %al
 ; CHECK-NEXT:    retq
   %ub = or i32 %lb, %sz
   %r0 = icmp uge i32 %val, %lb
@@ -225,11 +221,11 @@
 ; CHECK-NEXT:    orl $1, %esi
 ; CHECK-NEXT:    orl $2, %edx
 ; CHECK-NEXT:    imull %esi, %edx
-; CHECK-NEXT:    cmpl %edi, %esi
-; CHECK-NEXT:    setae %cl
-; CHECK-NEXT:    cmpl %edi, %edx
-; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    orb %cl, %al
+; CHECK-NEXT:    subl %esi, %edx
+; CHECK-NEXT:    notl %esi
+; CHECK-NEXT:    addl %edi, %esi
+; CHECK-NEXT:    cmpl %edx, %esi
+; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    retq
   %lb = or i32 %lb_in, 1
   %sz = or i32 %sz_in, 2
@@ -290,11 +286,11 @@
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; CHECK-NEXT:    shll %cl, %eax
-; CHECK-NEXT:    cmpl %edi, %esi
-; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    incl %esi
+; CHECK-NEXT:    subl %esi, %edi
+; CHECK-NEXT:    subl %esi, %eax
 ; CHECK-NEXT:    cmpl %eax, %edi
 ; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    andb %cl, %al
 ; CHECK-NEXT:    retq
   %lb = or i32 %lb_in, 1
   %sz = or i32 %sz_in, 1
@@ -332,12 +328,10 @@
 ; CHECK-LABEL: or_bounds_gep:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %edx, %eax
-; CHECK-NEXT:    leaq (%rsi,%rax,8), %rax
-; CHECK-NEXT:    cmpq %rdi, %rsi
-; CHECK-NEXT:    seta %cl
+; CHECK-NEXT:    shlq $3, %rax
+; CHECK-NEXT:    subq %rsi, %rdi
 ; CHECK-NEXT:    cmpq %rax, %rdi
 ; CHECK-NEXT:    setae %al
-; CHECK-NEXT:    orb %cl, %al
 ; CHECK-NEXT:    retq
   %sz = zext i32 %sz_in to i64
   %ub = getelementptr inbounds i64, ptr %lb, i64 %sz
@@ -353,11 +347,11 @@
 ; CHECK-NEXT:    movl %edx, %eax
 ; CHECK-NEXT:    orq $1, %rax
 ; CHECK-NEXT:    addq %rsi, %rax
-; CHECK-NEXT:    cmpq %rdi, %rsi
-; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    incq %rsi
+; CHECK-NEXT:    subq %rsi, %rdi
+; CHECK-NEXT:    subq %rsi, %rax
 ; CHECK-NEXT:    cmpq %rax, %rdi
 ; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    andb %cl, %al
 ; CHECK-NEXT:    retq
   %sz_in64 = zext i32 %sz_in to i64
   %sz = or i64 %sz_in64, 1
@@ -374,12 +368,10 @@
 ; CHECK-NEXT:    movl %edx, %eax
 ; CHECK-NEXT:    shlq $4, %rax
 ; CHECK-NEXT:    orq $16, %rax
-; CHECK-NEXT:    addq %rsi, %rax
-; CHECK-NEXT:    cmpq %rdi, %rsi
-; CHECK-NEXT:    setae %cl
-; CHECK-NEXT:    cmpq %rax, %rdi
-; CHECK-NEXT:    seta %al
-; CHECK-NEXT:    orb %cl, %al
+; CHECK-NEXT:    notq %rsi
+; CHECK-NEXT:    addq %rdi, %rsi
+; CHECK-NEXT:    cmpq %rax, %rsi
+; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    retq
   %sz_in64 = zext i32 %sz_in to i64
   %sz = or i64 %sz_in64, 1
@@ -414,12 +406,10 @@
 ; CHECK-LABEL: or_bounds_gep4:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %edx, %eax
-; CHECK-NEXT:    leaq (%rsi,%rax,4), %rax
-; CHECK-NEXT:    cmpq %rdi, %rsi
-; CHECK-NEXT:    seta %cl
+; CHECK-NEXT:    shlq $2, %rax
+; CHECK-NEXT:    subq %rsi, %rdi
 ; CHECK-NEXT:    cmpq %rax, %rdi
 ; CHECK-NEXT:    seta %al
-; CHECK-NEXT:    orb %cl, %al
 ; CHECK-NEXT:    retq
   %sz = zext i32 %sz_in to i64
   %ub = getelementptr inbounds i32, ptr %lb, i64 %sz