diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6042,6 +6042,233 @@ return SDValue(); } +static SDValue foldUnsignedBoundsCheck(SDNode *LogicOp, SelectionDAG &DAG) { + SDValue LHS = LogicOp->getOperand(0); + SDValue RHS = LogicOp->getOperand(1); + if (!LHS.getOperand(0).getValueType().isInteger()) + return SDValue(); + if (!RHS.getOperand(0).getValueType().isInteger()) + return SDValue(); + + // We are looking for something along the lines of: + // Val u>/u>= LowerBound && Val u/u>= UpperBound + // + // With that pattern we can often reduce it to: + // Val u + // UpperBound, then Val - LowBound will just trivially be greater than + // UpperBound - LowerBound. + + // Return if we know either Hi u> Lo or if OrEq is set then Hi u>= Lo. First + // checks a few basic patterns, then falls back to knownbits. + auto IsBoundPair = [&DAG](SDValue Lo, SDValue Hi, bool OrEq) { + switch (Hi.getOpcode()) { + case ISD::ADD: + // x nuw+ y u>= x,y + [[fallthrough]]; + case ISD::MUL: + // x nuw* y u>= x if y != 0 + case ISD::SHL: + // x nuw<< y u>= x + if (!Hi->getFlags().hasNoUnsignedWrap()) + break; + [[fallthrough]]; + case ISD::OR: + // x | y u>= x,y + for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { + if (Hi.getOperand(OpIdx) == Lo) { + if (OrEq) + return Hi.getOpcode() != ISD::MUL || + DAG.isKnownNeverZero(Hi.getOperand(1 - OpIdx)); + // x nuw+ y u> x if y != 0 + if (Hi.getOpcode() == ISD::ADD) + return DAG.isKnownNeverZero(Hi.getOperand(1 - OpIdx)); + // x nuw<< y u> x if x != 0 && y != 0 + if (Hi.getOpcode() == ISD::SHL) + return DAG.isKnownNeverZero(Hi.getOperand(OpIdx)) && + DAG.isKnownNeverZero(Hi.getOperand(1 - OpIdx)); + // x nuw<< y u> x if x != 0 && y u> 1 + if (Hi.getOpcode() == ISD::MUL) + return DAG.isKnownNeverZero(Hi.getOperand(OpIdx)) && + DAG.computeKnownBits(Hi.getOperand(1 - OpIdx)) + .getMinValue() + .ugt(1); + // x | y u> x if (~x & y) != 0 + KnownBits Known0 = DAG.computeKnownBits(Hi.getOperand(1 - OpIdx)); + if (Known0.isUnknown()) + return false; + KnownBits Known1 = DAG.computeKnownBits(Hi.getOperand(OpIdx)); + + // ~ for KnownBits + std::swap(Known0.One, Known0.Zero); + return (Known0 & Known1).isNonZero(); + } + if (Hi.getOpcode() == ISD::SHL) + break; + } + break; + default: + break; + } + + switch (Lo.getOpcode()) { + case ISD::SUB: + // x nuw- y u<= x + if (!Lo->getFlags().hasNoUnsignedWrap()) + break; + [[fallthrough]]; + case ISD::SRL: + // x >> y u<= x + case ISD::UDIV: + // x / y u<= x + if (Lo.getOperand(0) == Hi) { + if (OrEq) + return true; + + if (!DAG.isKnownNeverZero(Lo.getOperand(1))) + return false; + + // x nuw- y u< x if y != 0 + if(Lo.getOpcode() == ISD::SUB) + return true; + + // x nuw>> y u< x if x != 0 && y != 0 + if (Lo.getOpcode() == ISD::SRL) + return DAG.isKnownNeverZero(Lo.getOperand(0)); + + // x / y u< x if x != 0 && y u> 1 + return DAG.computeKnownBits(Lo.getOperand(0)).getMaxValue().ugt(1); + } + break; + default: + break; + } + + // No obvious opcode for defining the relationship, so use knownbits. + KnownBits KnownHi = DAG.computeKnownBits(Hi); + if (KnownHi.isUnknown()) + return false; + KnownBits KnownLo = DAG.computeKnownBits(Lo); + std::optional OkayOrder = OrEq ? KnownBits::uge(KnownHi, KnownLo) + : KnownBits::ugt(KnownHi, KnownLo); + return OkayOrder.has_value() && *OkayOrder; + }; + + bool IsAnd = LogicOp->getOpcode() == ISD::AND; + SDValue BoundLo, BoundHi, Val; + ISD::CondCode CCLo, CCHi; + EVT VT = LogicOp->getValueType(0); + + // Set: BoundLo, BoundHi, Val, CCLo, and CCHi using arguments OpLo and OpHi + auto TryFindBoundsPattern = [&](SDValue OpLo, SDValue OpHi) { + CCLo = cast(OpLo.getOperand(2))->get(); + // In the Or case, we are looking for exclusion from bounds, so invert + // condition. + if (!IsAnd) + CCLo = ISD::getSetCCInverse(CCLo, VT); + + // Get Val and BoundLo based on what CCLo is. + switch (CCLo) { + case ISD::CondCode::SETUGT: + case ISD::CondCode::SETUGE: + Val = OpLo->getOperand(0); + BoundLo = OpLo->getOperand(1); + break; + case ISD::CondCode::SETULT: + case ISD::CondCode::SETULE: + Val = OpLo->getOperand(1); + BoundLo = OpLo->getOperand(0); + break; + default: + return false; + }; + + CCHi = cast(OpHi.getOperand(2))->get(); + // In the Or case, we are looking for exclusion from bounds, so invert + // condition. + if (!IsAnd) + CCHi = ISD::getSetCCInverse(CCHi, VT); + + // Get BoundHi and verify we match Val based on CCHi. + switch (CCHi) { + case ISD::CondCode::SETUGT: + case ISD::CondCode::SETUGE: + if (Val != OpHi->getOperand(1)) + return false; + BoundHi = OpHi->getOperand(0); + break; + case ISD::CondCode::SETULT: + case ISD::CondCode::SETULE: + if (Val != OpHi->getOperand(0)) + return false; + BoundHi = OpHi->getOperand(1); + break; + default: + return false; + }; + + // Transform aren't always valid with undef bounds (they are okay still if + // BoundHi > BoundLo, but not if BoundHi == BoundLo). Undefs are uncommon + // enough we can just exclude them. + if (BoundLo.isUndef() || BoundHi.isUndef() || Val.isUndef()) + return false; + + // Return true if BoundHi/BoundLo form bounds around Val. + return IsBoundPair(BoundLo, BoundHi, ISD::isTrueWhenEqual(CCLo)); + }; + + if (!TryFindBoundsPattern(LHS, RHS) && !TryFindBoundsPattern(RHS, LHS)) + return SDValue(); + + EVT OpVT = Val.getValueType(); + SDLoc DL(LogicOp); + + // We might need to adjust these values if CCLo is not inclusive. + SDValue SubValBy = BoundLo; + SDValue SubHiBy = BoundLo; + + ISD::CondCode NewCC = ISD::isTrueWhenEqual(CCHi) ? ISD::CondCode::SETULE + : ISD::CondCode::SETULT; + + // Nearly all of the intermediate nodes can have nuw flag. + SDNodeFlags NUWFlag; + NUWFlag.setNoUnsignedWrap(true); + + // If low bound comparison doesn't include zero, we need to subtract by one + // more to overflow on Val == BoundLo. + if(!ISD::isTrueWhenEqual(CCLo)) { + // We are only here if BoundHi > BoundLo which is impossible if BoundLo == + // UINT_MAX so +1 will never overflow. + SubValBy = DAG.getNode(ISD::ADD, DL, OpVT, SubValBy, + DAG.getConstant(1, DL, OpVT), NUWFlag); + + // If condition on upperbound is inclusive, we can fix the off by one by + // just switching to exclusive bound. Otherwise we need to decrease size by + // one. Do so by reusing our lo + 1: hi - (lo + 1) -> sz - 1. + if (NewCC == ISD::CondCode::SETULE) + NewCC = ISD::CondCode::SETULT; + else + SubHiBy = SubValBy; + } + + // BoundHi must be u>= BoundLo so the subtract never has unsigned overflow. + SDValue BoundSz = DAG.getNode(ISD::SUB, DL, OpVT, BoundHi, SubHiBy, NUWFlag); + SDValue SubbedVal = DAG.getNode(ISD::SUB, DL, OpVT, Val, SubValBy); + + // In Or case we inverted the conditions (so we could shared match code with + // And). Need to un-invert before creating final op. + if (!IsAnd) + NewCC = ISD::getSetCCInverse(NewCC, VT); + + return DAG.getSetCC(DL, VT, SubbedVal, BoundSz, NewCC); +} + static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) { using AndOrSETCCFoldKind = TargetLowering::AndOrSETCCFoldKind; assert( @@ -6055,6 +6282,9 @@ !LHS->hasOneUse() || !RHS->hasOneUse()) return SDValue(); + if (SDValue R = foldUnsignedBoundsCheck(LogicOp, DAG)) + return R; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); AndOrSETCCFoldKind TargetPreference = TLI.isDesirableToCombineLogicOpOfSETCC( LogicOp, LHS.getNode(), RHS.getNode()); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4052,6 +4052,11 @@ // it. IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); + // Inbounds GEP scaliung is guranteed to be NSW. + SDNodeFlags ScaleFlags; + if (cast(I).isInBounds()) + ScaleFlags.setNoSignedWrap(true); + if (ElementScalable) { EVT VScaleTy = N.getValueType().getScalarType(); SDValue VScale = DAG.getNode( @@ -4059,27 +4064,33 @@ DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy)); if (IsVectorGEP) VScale = DAG.getSplatVector(N.getValueType(), dl, VScale); - IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale); + IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale, ScaleFlags); } else { // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. if (ElementMul != 1) { if (ElementMul.isPowerOf2()) { unsigned Amt = ElementMul.logBase2(); - IdxN = DAG.getNode(ISD::SHL, dl, - N.getValueType(), IdxN, - DAG.getConstant(Amt, dl, IdxN.getValueType())); + IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN, + DAG.getConstant(Amt, dl, IdxN.getValueType()), + ScaleFlags); } else { SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl, IdxN.getValueType()); - IdxN = DAG.getNode(ISD::MUL, dl, - N.getValueType(), IdxN, Scale); + IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale, + ScaleFlags); } } } - - N = DAG.getNode(ISD::ADD, dl, - N.getValueType(), N, IdxN); + // Inbounds GEP cannot wrap around the address space if idx is + // non-negative. Not we don't need to actually check idxN (after scaling) + // as inbounds implies nsw for scaling. + SDNodeFlags AccumFlags; + if (cast(I).isInBounds()) + AccumFlags.setNoUnsignedWrap( + llvm::isKnownNonNegative(Idx, DAG.getDataLayout())); + + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN, AccumFlags); } } diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -278,30 +278,28 @@ define amdgpu_kernel void @test_vop3_cmp_u32_sop_or(ptr addrspace(1) %arg) { ; GFX1032-LABEL: test_vop3_cmp_u32_sop_or: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX1032-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: global_load_dword v1, v0, s[2:3] +; GFX1032-NEXT: global_load_dword v1, v0, s[0:1] ; GFX1032-NEXT: s_waitcnt vmcnt(0) -; GFX1032-NEXT: v_cmp_lt_u32_e32 vcc_lo, 3, v1 -; GFX1032-NEXT: v_cmp_gt_u32_e64 s0, 2, v1 -; GFX1032-NEXT: s_or_b32 s0, vcc_lo, s0 -; GFX1032-NEXT: v_cndmask_b32_e64 v1, 2, 1, s0 -; GFX1032-NEXT: global_store_dword v0, v1, s[2:3] +; GFX1032-NEXT: v_add_nc_u32_e32 v1, -2, v1 +; GFX1032-NEXT: v_cmp_lt_u32_e32 vcc_lo, 1, v1 +; GFX1032-NEXT: v_cndmask_b32_e64 v1, 2, 1, vcc_lo +; GFX1032-NEXT: global_store_dword v0, v1, s[0:1] ; GFX1032-NEXT: s_endpgm ; ; GFX1064-LABEL: test_vop3_cmp_u32_sop_or: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX1064-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: global_load_dword v1, v0, s[2:3] +; GFX1064-NEXT: global_load_dword v1, v0, s[0:1] ; GFX1064-NEXT: s_waitcnt vmcnt(0) -; GFX1064-NEXT: v_cmp_lt_u32_e32 vcc, 3, v1 -; GFX1064-NEXT: v_cmp_gt_u32_e64 s[0:1], 2, v1 -; GFX1064-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX1064-NEXT: v_cndmask_b32_e64 v1, 2, 1, s[0:1] -; GFX1064-NEXT: global_store_dword v0, v1, s[2:3] +; GFX1064-NEXT: v_add_nc_u32_e32 v1, -2, v1 +; GFX1064-NEXT: v_cmp_lt_u32_e32 vcc, 1, v1 +; GFX1064-NEXT: v_cndmask_b32_e64 v1, 2, 1, vcc +; GFX1064-NEXT: global_store_dword v0, v1, s[0:1] ; GFX1064-NEXT: s_endpgm %lid = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %lid diff --git a/llvm/test/CodeGen/X86/icmp-in-unsigned-bounds.ll b/llvm/test/CodeGen/X86/icmp-in-unsigned-bounds.ll --- a/llvm/test/CodeGen/X86/icmp-in-unsigned-bounds.ll +++ b/llvm/test/CodeGen/X86/icmp-in-unsigned-bounds.ll @@ -4,12 +4,9 @@ define i1 @and_bounds_uge_lb_ult_ub_add(i32 %val, i32 %lb, i32 %sz) nounwind { ; CHECK-LABEL: and_bounds_uge_lb_ult_ub_add: ; CHECK: # %bb.0: -; CHECK-NEXT: addl %esi, %edx -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: setae %cl +; CHECK-NEXT: subl %esi, %edi ; CHECK-NEXT: cmpl %edx, %edi ; CHECK-NEXT: setb %al -; CHECK-NEXT: andb %cl, %al ; CHECK-NEXT: retq %ub = add nuw i32 %lb, %sz %r0 = icmp uge i32 %val, %lb @@ -42,11 +39,10 @@ ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrl %cl, %eax -; CHECK-NEXT: cmpl %edi, %eax -; CHECK-NEXT: seta %cl +; CHECK-NEXT: subl %eax, %esi +; CHECK-NEXT: subl %eax, %edi ; CHECK-NEXT: cmpl %esi, %edi ; CHECK-NEXT: setae %al -; CHECK-NEXT: orb %cl, %al ; CHECK-NEXT: retq %lb = lshr i32 %ub, %sz %r0 = icmp ugt i32 %lb, %val @@ -78,14 +74,15 @@ define i1 @and_bounds_uge_lb_ult_ub_sub(i32 %val, i32 %ub, i32 %sz_in) nounwind { ; CHECK-LABEL: and_bounds_uge_lb_ult_ub_sub: ; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edx killed $edx def $rdx +; CHECK-NEXT: # kill: def $esi killed $esi def $rsi ; CHECK-NEXT: orl $1, %edx -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: subl %edx, %eax -; CHECK-NEXT: cmpl %eax, %edi -; CHECK-NEXT: seta %cl +; CHECK-NEXT: negl %edx +; CHECK-NEXT: leal 1(%rsi,%rdx), %eax +; CHECK-NEXT: subl %eax, %esi +; CHECK-NEXT: subl %eax, %edi ; CHECK-NEXT: cmpl %esi, %edi ; CHECK-NEXT: setb %al -; CHECK-NEXT: andb %cl, %al ; CHECK-NEXT: retq %sz = or i32 %sz_in, 1 %lb = sub nuw i32 %ub, %sz @@ -123,11 +120,11 @@ ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divl %ecx -; CHECK-NEXT: cmpl %edi, %eax -; CHECK-NEXT: setae %cl -; CHECK-NEXT: cmpl %edi, %esi -; CHECK-NEXT: setb %al -; CHECK-NEXT: orb %cl, %al +; CHECK-NEXT: subl %eax, %esi +; CHECK-NEXT: notl %eax +; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: cmpl %esi, %eax +; CHECK-NEXT: setae %al ; CHECK-NEXT: retq %ub = or i32 %ub_in, 1 %sz = or i32 %sz_in, 2 @@ -147,11 +144,11 @@ ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divl %ecx -; CHECK-NEXT: cmpl %edi, %eax -; CHECK-NEXT: setae %cl -; CHECK-NEXT: cmpl %edi, %esi -; CHECK-NEXT: setb %al -; CHECK-NEXT: orb %cl, %al +; CHECK-NEXT: subl %eax, %esi +; CHECK-NEXT: notl %eax +; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: cmpl %esi, %eax +; CHECK-NEXT: setae %al ; CHECK-NEXT: retq %ub = or i32 %ub_in, 1 %sz = or i32 %sz_in, 1 @@ -170,11 +167,11 @@ ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divl %ecx -; CHECK-NEXT: cmpl %edi, %eax -; CHECK-NEXT: setae %cl -; CHECK-NEXT: cmpl %edi, %esi -; CHECK-NEXT: setb %al -; CHECK-NEXT: orb %cl, %al +; CHECK-NEXT: subl %eax, %esi +; CHECK-NEXT: notl %eax +; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: cmpl %esi, %eax +; CHECK-NEXT: setae %al ; CHECK-NEXT: retq %ub = or i32 %ub_in, 0 %sz = or i32 %sz_in, 2 @@ -189,11 +186,10 @@ ; CHECK-LABEL: and_bounds_uge_lb_uge_ub_or: ; CHECK: # %bb.0: ; CHECK-NEXT: orl %esi, %edx -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: setae %cl -; CHECK-NEXT: cmpl %edi, %edx -; CHECK-NEXT: setae %al -; CHECK-NEXT: andb %cl, %al +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: subl %esi, %edx +; CHECK-NEXT: cmpl %edx, %edi +; CHECK-NEXT: setbe %al ; CHECK-NEXT: retq %ub = or i32 %lb, %sz %r0 = icmp uge i32 %val, %lb @@ -225,11 +221,11 @@ ; CHECK-NEXT: orl $1, %esi ; CHECK-NEXT: orl $2, %edx ; CHECK-NEXT: imull %esi, %edx -; CHECK-NEXT: cmpl %edi, %esi -; CHECK-NEXT: setae %cl -; CHECK-NEXT: cmpl %edi, %edx -; CHECK-NEXT: setb %al -; CHECK-NEXT: orb %cl, %al +; CHECK-NEXT: subl %esi, %edx +; CHECK-NEXT: notl %esi +; CHECK-NEXT: addl %edi, %esi +; CHECK-NEXT: cmpl %edx, %esi +; CHECK-NEXT: setae %al ; CHECK-NEXT: retq %lb = or i32 %lb_in, 1 %sz = or i32 %sz_in, 2 @@ -290,11 +286,11 @@ ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shll %cl, %eax -; CHECK-NEXT: cmpl %edi, %esi -; CHECK-NEXT: setb %cl +; CHECK-NEXT: incl %esi +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: subl %esi, %eax ; CHECK-NEXT: cmpl %eax, %edi ; CHECK-NEXT: setb %al -; CHECK-NEXT: andb %cl, %al ; CHECK-NEXT: retq %lb = or i32 %lb_in, 1 %sz = or i32 %sz_in, 1 @@ -332,12 +328,10 @@ ; CHECK-LABEL: or_bounds_gep: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: leaq (%rsi,%rax,8), %rax -; CHECK-NEXT: cmpq %rdi, %rsi -; CHECK-NEXT: seta %cl +; CHECK-NEXT: shlq $3, %rax +; CHECK-NEXT: subq %rsi, %rdi ; CHECK-NEXT: cmpq %rax, %rdi ; CHECK-NEXT: setae %al -; CHECK-NEXT: orb %cl, %al ; CHECK-NEXT: retq %sz = zext i32 %sz_in to i64 %ub = getelementptr inbounds i64, ptr %lb, i64 %sz @@ -353,11 +347,11 @@ ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: orq $1, %rax ; CHECK-NEXT: addq %rsi, %rax -; CHECK-NEXT: cmpq %rdi, %rsi -; CHECK-NEXT: setb %cl +; CHECK-NEXT: incq %rsi +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: subq %rsi, %rax ; CHECK-NEXT: cmpq %rax, %rdi ; CHECK-NEXT: setb %al -; CHECK-NEXT: andb %cl, %al ; CHECK-NEXT: retq %sz_in64 = zext i32 %sz_in to i64 %sz = or i64 %sz_in64, 1 @@ -374,12 +368,10 @@ ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: shlq $4, %rax ; CHECK-NEXT: orq $16, %rax -; CHECK-NEXT: addq %rsi, %rax -; CHECK-NEXT: cmpq %rdi, %rsi -; CHECK-NEXT: setae %cl -; CHECK-NEXT: cmpq %rax, %rdi -; CHECK-NEXT: seta %al -; CHECK-NEXT: orb %cl, %al +; CHECK-NEXT: notq %rsi +; CHECK-NEXT: addq %rdi, %rsi +; CHECK-NEXT: cmpq %rax, %rsi +; CHECK-NEXT: setae %al ; CHECK-NEXT: retq %sz_in64 = zext i32 %sz_in to i64 %sz = or i64 %sz_in64, 1 @@ -414,12 +406,10 @@ ; CHECK-LABEL: or_bounds_gep4: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: leaq (%rsi,%rax,4), %rax -; CHECK-NEXT: cmpq %rdi, %rsi -; CHECK-NEXT: seta %cl +; CHECK-NEXT: shlq $2, %rax +; CHECK-NEXT: subq %rsi, %rdi ; CHECK-NEXT: cmpq %rax, %rdi ; CHECK-NEXT: seta %al -; CHECK-NEXT: orb %cl, %al ; CHECK-NEXT: retq %sz = zext i32 %sz_in to i64 %ub = getelementptr inbounds i32, ptr %lb, i64 %sz