diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4802,6 +4802,17 @@ EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const; + // Simplify SETCC testing shifted value for equality/non-equality to zero by + // removing redundant operations generated during shift's expansion. + // Shift's expansion (when its result fed into SETCC eq/ne 0) generates tree + // consisting of OR and multiple SRL/SHL (that may be combined into + // FSHL/FSHR). When such pair of shifts applied to the same operand it + // performs rotation and it could be eliminated as far as the overall result + // is compared with zero. + SDValue optimizeSetCCOfExpandedShift(EVT SCCVT, SDValue N0, SDValue N1C, + ISD::CondCode Cond, DAGCombinerInfo &DCI, + const SDLoc &DL) const; + SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL, diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3662,6 +3662,150 @@ return T2; } +namespace { +/// Helper class for optimizeSetCCOfExpandedShift. +/// Scans an expression tree consiting of ORs and shifts to find and replace +/// shift pairs performing rotation with a rotation's operand. +class ExpandedShiftsSimplifier { + struct ShiftInfo { + APInt Bits; + bool IsLeft; + }; + SmallDenseMap UnmatchedShifts; + SmallVector Result; + unsigned MatchedShiftsCount = 0; + + /// Match pairs of shifts applied to the same operand that effectively + /// perform its rotation: + /// 1) create a new entry in UnmatchedShifts map if Op was observed + /// for the first time; + /// 2) if UnmatchedShifts map contains an entry for the Op check that + /// it was created for a shift in opposite direction and that + /// amount of bits in these two shifts is summed up to OpSizeInBits. + bool matchShifts(SDValue Op, const APInt &C, bool IsLeft) { + ShiftInfo &Info = UnmatchedShifts[Op]; + if (Info.Bits.isZero()) { + Info.Bits = C; + Info.IsLeft = IsLeft; + return true; + } + if (Info.IsLeft == IsLeft) + return false; + if (Info.Bits + C != Op.getValueSizeInBits()) + return false; + Result.push_back(Op); + UnmatchedShifts.erase(Op); + ++MatchedShiftsCount; + return true; + } + + /// Recursively scan DAG to match all shifts while following conditions are + /// met: + /// 1) every node should has only one use; + /// 2) every shift should be either first shift found for its operand + /// or there shoud be previously found matching shift in opposite + /// direction; + /// 3) Depth should be lower than SelectionDAG::MaxRecursionDepth + bool scan(SDValue Value, unsigned Depth = 0) { + if (Depth >= SelectionDAG::MaxRecursionDepth || !Value->hasOneUse()) + return false; + unsigned Opcode = Value->getOpcode(); + bool IsShiftLeft = Opcode == ISD::SHL || Opcode == ISD::FSHL; + ConstantSDNode *C; + + if (Opcode == ISD::OR) + return scan(Value->getOperand(0), Depth + 1) && + scan(Value->getOperand(1), Depth + 1); + if (Opcode == ISD::SRL || Opcode == ISD::SHL) { + if (!(C = dyn_cast(Value->getOperand(1)))) + return false; + SDValue Op = Value->getOperand(0); + return matchShifts(Op, C->getAPIntValue(), IsShiftLeft); + } + if (Opcode == ISD::FSHL || Opcode == ISD::FSHR) { + if (!(C = dyn_cast(Value->getOperand(2)))) + return false; + SDValue Op1 = Value->getOperand(0); + SDValue Op2 = Value->getOperand(1); + const APInt &CVal = C->getAPIntValue(); + // For funnel shifts second operand is effectively shifted + // in opposite direction. + return matchShifts(Op1, CVal, IsShiftLeft) && + matchShifts(Op2, Value.getValueSizeInBits() - CVal, !IsShiftLeft); + } + Result.push_back(Value); + return true; + } + +public: + SDValue simplify(SDValue Value, SelectionDAG &DAG, const SDLoc &DL) { + assert(Result.size() == 0 && UnmatchedShifts.size() == 0 && + MatchedShiftsCount == 0 && "simplify could be called only once"); + + if (!scan(Value)) + return SDValue(); + // There should be at most one unmatched shift and at least one pair + // of matched shifts. + if (MatchedShiftsCount == 0 || UnmatchedShifts.size() > 1 || Result.empty()) + return SDValue(); + // Recreate value for unmatched shift. + if (!UnmatchedShifts.empty()) { + DenseMap::iterator UnmatchedShift = + UnmatchedShifts.begin(); + SDValue Op = UnmatchedShift->first; + ShiftInfo &Info = UnmatchedShift->second; + EVT ShiftTy = + EVT::getIntegerVT(*DAG.getContext(), Info.Bits.getBitWidth()); + SDValue Con = DAG.getConstant(Info.Bits, DL, ShiftTy); + unsigned ShiftOpcode = Info.IsLeft ? ISD::SHL : ISD::SRL; + Result.push_back( + DAG.getNode(ShiftOpcode, DL, Value.getValueType(), Op, Con)); + } + // Reduce all values using OR. + // Push new OR back to the Result list and combine pairs of values from it + // to generate balanced tree and shorten the critical path. + for (size_t Index = 0; Index + 1 < Result.size(); Index += 2) { + SDValue NewOr = DAG.getNode(ISD::OR, DL, Value.getValueType(), + Result[Index], Result[Index + 1]); + Result.push_back(NewOr); + } + return Result.back(); + } +}; + +} // end anonymous namespace + +// Example of redundant shifts elimination: +// (or (or (srl X, C0), (shl Y, C1)), (srl Y, C0)) ==/!= 0 +// --> (or (srl X, C0), Y) ==/!= 0 +// +// (or (or (srl Y, C0), (shl X, C1)), (shl Y, C1)) ==/!= 0 +// --> (or (shl X, C1), Y) ==/!= 0 +// +// (or (srl X, C), (fshr X, Y, C)) ==/!= 0 --> (or (srl Y, C), X) +// +// (or (or (fshl W, X, C), (fshl X, Y, C)), +// (or (fshl Y, Z, C), (shl Z, C)))) ==/!= 0 +// --> (or (or (shl W, C), X), (or Y, Z)) +SDValue TargetLowering::optimizeSetCCOfExpandedShift(EVT SCCVT, SDValue N0, + SDValue N1C, + ISD::CondCode Cond, + DAGCombinerInfo &DCI, + const SDLoc &DL) const { + assert(isNullOrNullSplat(N1C) && "Should be a comparison with 0."); + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode"); + + if (N0.getValueType().isVector()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + ExpandedShiftsSimplifier matcher; + if (SDValue ReducedTree = matcher.simplify(N0, DAG, DL)) + return DAG.getSetCC(DL, SCCVT, ReducedTree, N1C, Cond); + + return SDValue(); +} + /// Try to fold an equality comparison with a {add/sub/xor} binary operation as /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to /// handle the commuted versions of these patterns. @@ -4346,6 +4490,12 @@ } } } + if (CmpZero) + // Try to simplify expanded shift by removing shift operations + // that effectively perform rotation. + if (SDValue CC = + optimizeSetCCOfExpandedShift(VT, N0, N1, Cond, DCI, dl)) + return CC; } // If we have "setcc X, C0", check to see if we can shrink the immediate diff --git a/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll b/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll --- a/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll @@ -12,8 +12,7 @@ ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds x0, x0, #1 ; CHECK-NEXT: adcs x1, x1, xzr -; CHECK-NEXT: extr x8, x1, x0, #60 -; CHECK-NEXT: orr x8, x8, x1, lsr #60 +; CHECK-NEXT: orr x8, x1, x0, lsr #60 ; CHECK-NEXT: cbnz x8, .LBB0_1 ; CHECK-NEXT: // %bb.2: // %exit ; CHECK-NEXT: ret @@ -32,8 +31,7 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind { ; CHECK-LABEL: opt_setcc_srl_eq_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: extr x8, x1, x0, #17 -; CHECK-NEXT: orr x8, x8, x1, lsr #17 +; CHECK-NEXT: orr x8, x1, x0, lsr #17 ; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret @@ -45,8 +43,7 @@ define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind { ; CHECK-LABEL: opt_setcc_srl_ne_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: extr x8, x1, x0, #17 -; CHECK-NEXT: orr x8, x8, x1, lsr #17 +; CHECK-NEXT: orr x8, x1, x0, lsr #17 ; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret @@ -58,8 +55,7 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_eq_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: extr x8, x1, x0, #47 -; CHECK-NEXT: orr x8, x8, x0, lsl #17 +; CHECK-NEXT: orr x8, x0, x1, lsl #17 ; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret @@ -71,8 +67,7 @@ define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_ne_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: extr x8, x1, x0, #47 -; CHECK-NEXT: orr x8, x8, x0, lsl #17 +; CHECK-NEXT: orr x8, x0, x1, lsl #17 ; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret @@ -106,8 +101,7 @@ define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts: ; CHECK: // %bb.0: -; CHECK-NEXT: extr x8, x0, x1, #47 -; CHECK-NEXT: orr x8, x8, x1, lsl #17 +; CHECK-NEXT: orr x8, x1, x0, lsl #17 ; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll --- a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll +++ b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll @@ -630,14 +630,10 @@ ; CHECKV7M-NEXT: ldrd lr, r0, [sp, #8] ; CHECKV7M-NEXT: beq .LBB6_2 ; CHECKV7M-NEXT: @ %bb.1: @ %then -; CHECKV7M-NEXT: lsrs r2, r2, #17 -; CHECKV7M-NEXT: orr.w r2, r2, r3, lsl #15 -; CHECKV7M-NEXT: orr.w r2, r2, r3, lsr #17 -; CHECKV7M-NEXT: lsr.w r3, r12, #17 -; CHECKV7M-NEXT: orr.w r3, r3, r1, lsl #15 +; CHECKV7M-NEXT: orr.w r2, r3, r2, lsr #17 +; CHECKV7M-NEXT: orr.w r1, r1, r12, lsr #17 ; CHECKV7M-NEXT: cmp r2, #0 ; CHECKV7M-NEXT: mov r2, r0 -; CHECKV7M-NEXT: orr.w r1, r3, r1, lsr #17 ; CHECKV7M-NEXT: it ne ; CHECKV7M-NEXT: movne r2, lr ; CHECKV7M-NEXT: cmp r1, #0 @@ -646,9 +642,7 @@ ; CHECKV7M-NEXT: add r0, r2 ; CHECKV7M-NEXT: pop {r7, pc} ; CHECKV7M-NEXT: .LBB6_2: @ %else -; CHECKV7M-NEXT: lsrs r1, r2, #17 -; CHECKV7M-NEXT: orr.w r1, r1, r3, lsl #15 -; CHECKV7M-NEXT: orr.w r1, r1, r3, lsr #17 +; CHECKV7M-NEXT: orr.w r1, r3, r2, lsr #17 ; CHECKV7M-NEXT: cmp r1, #0 ; CHECKV7M-NEXT: it ne ; CHECKV7M-NEXT: movne r0, lr @@ -664,14 +658,10 @@ ; CHECKV7A-NEXT: lsls r4, r4, #31 ; CHECKV7A-NEXT: beq .LBB6_2 ; CHECKV7A-NEXT: @ %bb.1: @ %then -; CHECKV7A-NEXT: lsrs r2, r2, #17 -; CHECKV7A-NEXT: orr.w r2, r2, r3, lsl #15 -; CHECKV7A-NEXT: orr.w r2, r2, r3, lsr #17 -; CHECKV7A-NEXT: lsr.w r3, r12, #17 -; CHECKV7A-NEXT: orr.w r3, r3, r1, lsl #15 +; CHECKV7A-NEXT: orr.w r2, r3, r2, lsr #17 +; CHECKV7A-NEXT: orr.w r1, r1, r12, lsr #17 ; CHECKV7A-NEXT: cmp r2, #0 ; CHECKV7A-NEXT: mov r2, r0 -; CHECKV7A-NEXT: orr.w r1, r3, r1, lsr #17 ; CHECKV7A-NEXT: it ne ; CHECKV7A-NEXT: movne r2, lr ; CHECKV7A-NEXT: cmp r1, #0 @@ -680,9 +670,7 @@ ; CHECKV7A-NEXT: add r0, r2 ; CHECKV7A-NEXT: pop {r4, pc} ; CHECKV7A-NEXT: .LBB6_2: @ %else -; CHECKV7A-NEXT: lsrs r1, r2, #17 -; CHECKV7A-NEXT: orr.w r1, r1, r3, lsl #15 -; CHECKV7A-NEXT: orr.w r1, r1, r3, lsr #17 +; CHECKV7A-NEXT: orr.w r1, r3, r2, lsr #17 ; CHECKV7A-NEXT: cmp r1, #0 ; CHECKV7A-NEXT: it ne ; CHECKV7A-NEXT: movne r0, lr diff --git a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll --- a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll @@ -12,9 +12,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r0, r0, #1 ; CHECK-NEXT: adc r1, r1, #0 -; CHECK-NEXT: lsr r2, r0, #16 -; CHECK-NEXT: orr r2, r2, r1, lsl #16 -; CHECK-NEXT: orr r2, r2, r1, lsr #16 +; CHECK-NEXT: orr r2, r1, r0, lsr #16 ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: bne .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %exit @@ -34,9 +32,7 @@ define i1 @opt_setcc_srl_eq_zero(i64 %a) nounwind { ; CHECK-LABEL: opt_setcc_srl_eq_zero: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsr r0, r0, #17 -; CHECK-NEXT: orr r0, r0, r1, lsl #15 -; CHECK-NEXT: orr r0, r0, r1, lsr #17 +; CHECK-NEXT: orr r0, r1, r0, lsr #17 ; CHECK-NEXT: clz r0, r0 ; CHECK-NEXT: lsr r0, r0, #5 ; CHECK-NEXT: bx lr @@ -48,9 +44,7 @@ define i1 @opt_setcc_srl_ne_zero(i64 %a) nounwind { ; CHECK-LABEL: opt_setcc_srl_ne_zero: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsr r0, r0, #17 -; CHECK-NEXT: orr r0, r0, r1, lsl #15 -; CHECK-NEXT: orr r0, r0, r1, lsr #17 +; CHECK-NEXT: orr r0, r1, r0, lsr #17 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: movwne r0, #1 ; CHECK-NEXT: bx lr @@ -62,9 +56,7 @@ define i1 @opt_setcc_shl_eq_zero(i64 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_eq_zero: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsl r1, r1, #17 -; CHECK-NEXT: orr r1, r1, r0, lsr #15 -; CHECK-NEXT: orr r0, r1, r0, lsl #17 +; CHECK-NEXT: orr r0, r0, r1, lsl #17 ; CHECK-NEXT: clz r0, r0 ; CHECK-NEXT: lsr r0, r0, #5 ; CHECK-NEXT: bx lr @@ -76,9 +68,7 @@ define i1 @opt_setcc_shl_ne_zero(i64 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_ne_zero: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsl r1, r1, #17 -; CHECK-NEXT: orr r1, r1, r0, lsr #15 -; CHECK-NEXT: orr r0, r1, r0, lsl #17 +; CHECK-NEXT: orr r0, r0, r1, lsl #17 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: movwne r0, #1 ; CHECK-NEXT: bx lr @@ -113,9 +103,7 @@ define i1 @opt_setcc_expanded_shl_correct_shifts(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsl r0, r0, #17 -; CHECK-NEXT: orr r0, r0, r1, lsr #15 -; CHECK-NEXT: orr r0, r0, r1, lsl #17 +; CHECK-NEXT: orr r0, r1, r0, lsl #17 ; CHECK-NEXT: clz r0, r0 ; CHECK-NEXT: lsr r0, r0, #5 ; CHECK-NEXT: bx lr @@ -151,15 +139,9 @@ define i1 @opt_setcc_shl_ne_zero_i128(i128 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_ne_zero_i128: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsl r3, r3, #17 -; CHECK-NEXT: orr r12, r3, r2, lsr #15 -; CHECK-NEXT: lsl r3, r1, #17 -; CHECK-NEXT: lsl r2, r2, #17 -; CHECK-NEXT: orr r3, r3, r0, lsr #15 -; CHECK-NEXT: orr r1, r2, r1, lsr #15 -; CHECK-NEXT: orr r3, r3, r12 -; CHECK-NEXT: orr r0, r1, r0, lsl #17 -; CHECK-NEXT: orrs r0, r0, r3 +; CHECK-NEXT: orr r2, r2, r3, lsl #17 +; CHECK-NEXT: orr r0, r1, r0 +; CHECK-NEXT: orrs r0, r0, r2 ; CHECK-NEXT: movwne r0, #1 ; CHECK-NEXT: bx lr %shl = shl i128 %a, 17 diff --git a/llvm/test/CodeGen/X86/icmp-shift-opt.ll b/llvm/test/CodeGen/X86/icmp-shift-opt.ll --- a/llvm/test/CodeGen/X86/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/X86/icmp-shift-opt.ll @@ -13,34 +13,29 @@ ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: .p2align 4, 0x90 ; X86-NEXT: .LBB0_1: # %loop ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: addl $1, %ecx +; X86-NEXT: addl $1, %edi ; X86-NEXT: adcl $0, %esi ; X86-NEXT: adcl $0, %edx -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: shldl $4, %edx, %edi -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: shldl $4, %esi, %ebp -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: shrl $28, %ecx -; X86-NEXT: orl %ebp, %ecx -; X86-NEXT: orl %edi, %ecx -; X86-NEXT: movl %eax, %ecx +; X86-NEXT: adcl $0, %ecx +; X86-NEXT: movl %ecx, %ebx +; X86-NEXT: orl %edx, %ebx +; X86-NEXT: movl %esi, %ebp +; X86-NEXT: shrl $28, %ebp +; X86-NEXT: orl %ebx, %ebp ; X86-NEXT: jne .LBB0_1 ; X86-NEXT: # %bb.2: # %exit -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: movl %edi, (%eax) ; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %edx, 8(%eax) -; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl %ecx, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -56,11 +51,9 @@ ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: addq $1, %rax ; X64-NEXT: adcq $0, %rdx -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: shldq $4, %rax, %rcx -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: shrq $60, %rsi -; X64-NEXT: orq %rcx, %rsi +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: shrq $60, %rcx +; X64-NEXT: orq %rdx, %rcx ; X64-NEXT: jne .LBB0_1 ; X64-NEXT: # %bb.2: # %exit ; X64-NEXT: retq @@ -79,30 +72,19 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_srl_eq_zero: ; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %edi -; X86-NEXT: shldl $15, %edx, %edi -; X86-NEXT: shldl $15, %ecx, %edx -; X86-NEXT: shrdl $17, %ecx, %eax -; X86-NEXT: orl %edi, %eax -; X86-NEXT: shrl $17, %esi -; X86-NEXT: orl %edx, %esi -; X86-NEXT: orl %eax, %esi +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shrl $17, %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl %ecx, %eax ; X86-NEXT: sete %al -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_srl_eq_zero: ; X64: # %bb.0: -; X64-NEXT: shrdq $17, %rsi, %rdi -; X64-NEXT: shrq $17, %rsi -; X64-NEXT: orq %rdi, %rsi +; X64-NEXT: shrq $17, %rdi +; X64-NEXT: orq %rsi, %rdi ; X64-NEXT: sete %al ; X64-NEXT: retq %srl = lshr i128 %a, 17 @@ -113,30 +95,19 @@ define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_srl_ne_zero: ; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %edi -; X86-NEXT: shldl $15, %edx, %edi -; X86-NEXT: shldl $15, %ecx, %edx -; X86-NEXT: shrdl $17, %ecx, %eax -; X86-NEXT: orl %edi, %eax -; X86-NEXT: shrl $17, %esi -; X86-NEXT: orl %edx, %esi -; X86-NEXT: orl %eax, %esi +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shrl $17, %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl %ecx, %eax ; X86-NEXT: setne %al -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_srl_ne_zero: ; X64: # %bb.0: -; X64-NEXT: shrdq $17, %rsi, %rdi -; X64-NEXT: shrq $17, %rsi -; X64-NEXT: orq %rdi, %rsi +; X64-NEXT: shrq $17, %rdi +; X64-NEXT: orq %rsi, %rdi ; X64-NEXT: setne %al ; X64-NEXT: retq %srl = lshr i128 %a, 17 @@ -147,27 +118,19 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_shl_eq_zero: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shldl $17, %esi, %edx -; X86-NEXT: shldl $17, %ecx, %esi -; X86-NEXT: shldl $17, %eax, %ecx -; X86-NEXT: shll $17, %eax -; X86-NEXT: orl %esi, %eax -; X86-NEXT: orl %edx, %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $17, %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: sete %al -; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_shl_eq_zero: ; X64: # %bb.0: -; X64-NEXT: shldq $17, %rdi, %rsi -; X64-NEXT: shlq $17, %rdi -; X64-NEXT: orq %rsi, %rdi +; X64-NEXT: shlq $17, %rsi +; X64-NEXT: orq %rdi, %rsi ; X64-NEXT: sete %al ; X64-NEXT: retq %shl = shl i128 %a, 17 @@ -178,27 +141,19 @@ define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_shl_ne_zero: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shldl $17, %esi, %edx -; X86-NEXT: shldl $17, %ecx, %esi -; X86-NEXT: shldl $17, %eax, %ecx -; X86-NEXT: shll $17, %eax -; X86-NEXT: orl %esi, %eax -; X86-NEXT: orl %edx, %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $17, %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: setne %al -; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_shl_ne_zero: ; X64: # %bb.0: -; X64-NEXT: shldq $17, %rdi, %rsi -; X64-NEXT: shlq $17, %rdi -; X64-NEXT: orq %rsi, %rdi +; X64-NEXT: shlq $17, %rsi +; X64-NEXT: orq %rdi, %rsi ; X64-NEXT: setne %al ; X64-NEXT: retq %shl = shl i128 %a, 17 @@ -262,27 +217,19 @@ define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind { ; X86-LABEL: opt_setcc_expanded_shl_correct_shifts: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shldl $17, %edx, %esi -; X86-NEXT: shldl $17, %ecx, %edx -; X86-NEXT: shldl $17, %eax, %ecx -; X86-NEXT: shll $17, %eax -; X86-NEXT: orl %edx, %eax -; X86-NEXT: orl %esi, %ecx +; X86-NEXT: shll $17, %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: sete %al -; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_expanded_shl_correct_shifts: ; X64: # %bb.0: -; X64-NEXT: shldq $17, %rsi, %rdi -; X64-NEXT: shlq $17, %rsi -; X64-NEXT: orq %rdi, %rsi +; X64-NEXT: shlq $17, %rdi +; X64-NEXT: orq %rsi, %rdi ; X64-NEXT: sete %al ; X64-NEXT: retq %shl.a = shl i64 %a, 17