diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4681,6 +4681,13 @@ EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const; + // Optimize SETCC comparing expanded SRL/SHL with zero by performing following + // transformations on SETCC's LHS: + // (or (or (srl X, C0), (shl Y, C1)), (srl Y, C0)) --> (or (srl X, C0), Y) + // (or (or (srl Y, C0), (shl X, C1)), (shl Y, C1)) --> (or (shl X, C1), Y) + SDValue optimizeSetCCOfExpandedShift(EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL) const; + SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL, diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3390,6 +3390,71 @@ return T2; } +// (or (or (srl X, C0), (shl Y, C1)), (srl Y, C0)) ==/!= 0 +// --> (or (srl X, C0), Y) ==/!= 0 +// (or (or (srl Y, C0), (shl X, C1)), (shl Y, C1)) ==/!= 0 +// --> (or (shl X, C1), Y) ==/!= 0 +SDValue TargetLowering::optimizeSetCCOfExpandedShift(EVT SCCVT, SDValue N0, + SDValue N1C, + ISD::CondCode Cond, + DAGCombinerInfo &DCI, + const SDLoc &DL) const { + assert(isConstOrConstSplat(N1C) && + isConstOrConstSplat(N1C)->getAPIntValue().isZero() && + "Should be a comparison with 0."); + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode"); + + if (N0.getOpcode() != ISD::OR || !N0.hasOneUse() || + !N0.getOperand(0).hasOneUse() || !N0.getOperand(1).hasOneUse()) + return SDValue(); + + auto Or = N0.getOperand(0); + auto Shift = N0.getOperand(1); + if (Or.getOpcode() != ISD::OR) { + if (Shift.getOpcode() != ISD::OR) + return SDValue(); + std::swap(Or, Shift); + } + if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) + return SDValue(); + + // Nested Or's SRL and SHL arms + auto OrSrl = Or.getOperand(0); + auto OrShl = Or.getOperand(1); + if (OrSrl.getOpcode() != ISD::SRL) + std::swap(OrSrl, OrShl); + if (OrSrl.getOpcode() != ISD::SRL || OrShl.getOpcode() != ISD::SHL) + return SDValue(); + + // Nested Or's arms shifting X and Y + auto OrShiftX = Shift.getOpcode() == ISD::SRL ? OrSrl : OrShl; + auto OrShiftY = Shift.getOpcode() == ISD::SRL ? OrShl : OrSrl; + + if (OrShiftY.getOperand(0) == Shift.getOperand(0)) { + auto OpSizeInBits = N0.getValueType().getScalarSizeInBits(); + auto MatchConstants = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + return (LHS->getAPIntValue() + RHS->getAPIntValue()) == OpSizeInBits; + }; + auto SameConstant = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { + return LHS->getAPIntValue() == RHS->getAPIntValue(); + }; + + auto C0 = OrSrl.getOperand(1); + auto C1 = OrShl.getOperand(1); + auto ShiftC = Shift.getOperand(1); + auto OrShiftXC = OrShiftX.getOperand(1); + if (ISD::matchBinaryPredicate(C0, C1, MatchConstants) && + ISD::matchBinaryPredicate(OrShiftXC, ShiftC, SameConstant)) { + SelectionDAG &DAG = DCI.DAG; + auto NewOr = DAG.getNode(ISD::OR, DL, N0.getValueType(), OrShiftX, + Shift.getOperand(0)); + return DAG.getSetCC(DL, SCCVT, NewOr, N1C, Cond); + } + } + return SDValue(); +} + /// Try to fold an equality comparison with a {add/sub/xor} binary operation as /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to /// handle the commuted versions of these patterns. @@ -4010,12 +4075,21 @@ } if (Cond == ISD::SETEQ || Cond == ISD::SETNE) { - // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <> Y) & C) ==/!= 0 - if (C1.isZero()) + if (C1.isZero()) { + // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <> Y) & C) ==/!= 0 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift( VT, N0, N1, Cond, DCI, dl)) return CC; + // (or (or (srl X, C0), (shl Y, C1)), (srl Y, C0)) ==/!= 0 + // --> (or (srl X, C0), Y) ==/!= 0 + // (or (or (srl Y, C0), (shl X, C1)), (shl Y, C1)) ==/!= 0 + // --> (or (shl X, C1), Y) ==/!= 0 + if (SDValue CC = + optimizeSetCCOfExpandedShift(VT, N0, N1, Cond, DCI, dl)) + return CC; + } + // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y). // For example, when high 32-bits of i64 X are known clear: // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0 diff --git a/llvm/test/CodeGen/AArch64/arm64-icmp-shift-opt.ll b/llvm/test/CodeGen/AArch64/arm64-icmp-shift-opt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-icmp-shift-opt.ll @@ -0,0 +1,139 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s + +; Optimize expanded SRL/SHL used as an input of +; SETCC comparing it with zero by removing rotation. +; +; See https://bugs.llvm.org/show_bug.cgi?id=50197 +define i128 @opt_setcc_lt_power_of_2(i128 %a) { +; CHECK-LABEL: opt_setcc_lt_power_of_2: +; CHECK: // %bb.0: +; CHECK-NEXT: .LBB0_1: // %loop +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: adds x0, x0, #1 +; CHECK-NEXT: adcs x1, x1, xzr +; CHECK-NEXT: orr x8, x1, x0, lsr #60 +; CHECK-NEXT: cbnz x8, .LBB0_1 +; CHECK-NEXT: // %bb.2: // %exit +; CHECK-NEXT: ret + br label %loop + +loop: + %phi.a = phi i128 [ %a, %0 ], [ %inc, %loop ] + %inc = add i128 %phi.a, 1 + %cmp = icmp ult i128 %inc, 1152921504606846976 + br i1 %cmp, label %exit, label %loop + +exit: + ret i128 %inc +} + +define i1 @opt_setcc_srl_eq_zero(i128 %a) { +; CHECK-LABEL: opt_setcc_srl_eq_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: orr x8, x1, x0, lsr #17 +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %srl = lshr i128 %a, 17 + %cmp = icmp eq i128 %srl, 0 + ret i1 %cmp +} + +define i1 @opt_setcc_srl_ne_zero(i128 %a) { +; CHECK-LABEL: opt_setcc_srl_ne_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: orr x8, x1, x0, lsr #17 +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %srl = lshr i128 %a, 17 + %cmp = icmp ne i128 %srl, 0 + ret i1 %cmp +} + +define i1 @opt_setcc_shl_eq_zero(i128 %a) { +; CHECK-LABEL: opt_setcc_shl_eq_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: orr x8, x0, x1, lsl #17 +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %shl = shl i128 %a, 17 + %cmp = icmp eq i128 %shl, 0 + ret i1 %cmp +} + +define i1 @opt_setcc_shl_ne_zero(i128 %a) { +; CHECK-LABEL: opt_setcc_shl_ne_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: orr x8, x0, x1, lsl #17 +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %shl = shl i128 %a, 17 + %cmp = icmp ne i128 %shl, 0 + ret i1 %cmp +} + +; Negative test: optimization should not be applied if shift has multiple users. +define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) { +; CHECK-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: extr x1, x1, x0, #47 +; CHECK-NEXT: lsl x0, x0, #17 +; CHECK-NEXT: orr x8, x0, x1 +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cset w19, eq +; CHECK-NEXT: bl use +; CHECK-NEXT: mov w0, w19 +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret + %shl = shl i128 %a, 17 + %cmp = icmp eq i128 %shl, 0 + call void @use(i128 %shl) + ret i1 %cmp +} + +; Check that optimization is applied to DAG having appropriate shape +; even if there were no actual shift's expansion. +define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) { +; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts: +; CHECK: // %bb.0: +; CHECK-NEXT: orr x8, x1, x0, lsl #17 +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %shl.a = shl i64 %a, 17 + %srl.b = lshr i64 %b, 47 + %or.0 = or i64 %shl.a, %srl.b + %shl.b = shl i64 %b, 17 + %or.1 = or i64 %or.0, %shl.b + %cmp = icmp eq i64 %or.1, 0 + ret i1 %cmp +} + +; Negative test: optimization should not be applied as +; constants used in shifts does not match. +define i1 @opt_setcc_expanded_shl_wrong_shifts(i64 %a, i64 %b) { +; CHECK-LABEL: opt_setcc_expanded_shl_wrong_shifts: +; CHECK: // %bb.0: +; CHECK-NEXT: extr x8, x0, x1, #47 +; CHECK-NEXT: orr x8, x8, x1, lsl #18 +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %shl.a = shl i64 %a, 17 + %srl.b = lshr i64 %b, 47 + %or.0 = or i64 %shl.a, %srl.b + %shl.b = shl i64 %b, 18 + %or.1 = or i64 %or.0, %shl.b + %cmp = icmp eq i64 %or.1, 0 + ret i1 %cmp +} + +declare void @use(i128 %a); diff --git a/llvm/test/CodeGen/ARM/arm-icmp-shift-opt.ll b/llvm/test/CodeGen/ARM/arm-icmp-shift-opt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/arm-icmp-shift-opt.ll @@ -0,0 +1,133 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=armv7 %s -o - | FileCheck %s + +; Optimize expanded SRL/SHL used as an input of +; SETCC comparing it with zero by removing rotation. +; +; See https://bugs.llvm.org/show_bug.cgi?id=50197 +define i64 @opt_setcc_lt_power_of_2(i64 %a) { +; CHECK-LABEL: opt_setcc_lt_power_of_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: .LBB0_1: @ %loop +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: adds r0, r0, #1 +; CHECK-NEXT: adc r1, r1, #0 +; CHECK-NEXT: orr r2, r1, r0, lsr #16 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: bne .LBB0_1 +; CHECK-NEXT: @ %bb.2: @ %exit +; CHECK-NEXT: bx lr + br label %loop + +loop: + %phi.a = phi i64 [ %a, %0 ], [ %inc, %loop ] + %inc = add i64 %phi.a, 1 + %cmp = icmp ult i64 %inc, 65536 + br i1 %cmp, label %exit, label %loop + +exit: + ret i64 %inc +} + +define i1 @opt_setcc_srl_eq_zero(i64 %a) { +; CHECK-LABEL: opt_setcc_srl_eq_zero: +; CHECK: @ %bb.0: +; CHECK-NEXT: orr r0, r1, r0, lsr #17 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsr r0, r0, #5 +; CHECK-NEXT: bx lr + %srl = lshr i64 %a, 17 + %cmp = icmp eq i64 %srl, 0 + ret i1 %cmp +} + +define i1 @opt_setcc_srl_ne_zero(i64 %a) { +; CHECK-LABEL: opt_setcc_srl_ne_zero: +; CHECK: @ %bb.0: +; CHECK-NEXT: orr r0, r1, r0, lsr #17 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movwne r0, #1 +; CHECK-NEXT: bx lr + %srl = lshr i64 %a, 17 + %cmp = icmp ne i64 %srl, 0 + ret i1 %cmp +} + +define i1 @opt_setcc_shl_eq_zero(i64 %a) { +; CHECK-LABEL: opt_setcc_shl_eq_zero: +; CHECK: @ %bb.0: +; CHECK-NEXT: orr r0, r0, r1, lsl #17 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsr r0, r0, #5 +; CHECK-NEXT: bx lr + %shl = shl i64 %a, 17 + %cmp = icmp eq i64 %shl, 0 + ret i1 %cmp +} + +define i1 @opt_setcc_shl_ne_zero(i64 %a) { +; CHECK-LABEL: opt_setcc_shl_ne_zero: +; CHECK: @ %bb.0: +; CHECK-NEXT: orr r0, r0, r1, lsl #17 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movwne r0, #1 +; CHECK-NEXT: bx lr + %shl = shl i64 %a, 17 + %cmp = icmp ne i64 %shl, 0 + ret i1 %cmp +} + +; Negative test: optimization should not be applied if shift has multiple users. +define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i64 %a) { +; CHECK-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, r5, r11, lr} +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: lsl r0, r1, #17 +; CHECK-NEXT: orr r5, r0, r4, lsr #15 +; CHECK-NEXT: lsl r0, r4, #17 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: bl use +; CHECK-NEXT: orr r0, r5, r4, lsl #17 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsr r0, r0, #5 +; CHECK-NEXT: pop {r4, r5, r11, pc} + %shl = shl i64 %a, 17 + %cmp = icmp eq i64 %shl, 0 + call void @use(i64 %shl) + ret i1 %cmp +} + +; Check that optimization is applied to DAG having appropriate shape +; even if there were no actual shift's expansion. +define i1 @opt_setcc_expanded_shl_correct_shifts(i32 %a, i32 %b) { +; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: bx lr + %shl.a = shl i32 %a, 17 + %srl.b = lshr i32 %b, 47 + %or.0 = or i32 %shl.a, %srl.b + %shl.b = shl i32 %b, 17 + %or.1 = or i32 %or.0, %shl.b + %cmp = icmp eq i32 %or.1, 0 + ret i1 %cmp +} + +; Negative test: optimization should not be applied as +; constants used in shifts does not match. +define i1 @opt_setcc_expanded_shl_wrong_shifts(i32 %a, i32 %b) { +; CHECK-LABEL: opt_setcc_expanded_shl_wrong_shifts: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: bx lr + %shl.a = shl i32 %a, 17 + %srl.b = lshr i32 %b, 47 + %or.0 = or i32 %shl.a, %srl.b + %shl.b = shl i32 %b, 18 + %or.1 = or i32 %or.0, %shl.b + %cmp = icmp eq i32 %or.1, 0 + ret i1 %cmp +} + +declare void @use(i64 %a); diff --git a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll --- a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll +++ b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll @@ -630,14 +630,10 @@ ; CHECKV7M-NEXT: ldrd lr, r0, [sp, #8] ; CHECKV7M-NEXT: beq .LBB6_2 ; CHECKV7M-NEXT: @ %bb.1: @ %then -; CHECKV7M-NEXT: lsrs r2, r2, #17 -; CHECKV7M-NEXT: orr.w r2, r2, r3, lsl #15 -; CHECKV7M-NEXT: orr.w r2, r2, r3, lsr #17 -; CHECKV7M-NEXT: lsr.w r3, r12, #17 -; CHECKV7M-NEXT: orr.w r3, r3, r1, lsl #15 +; CHECKV7M-NEXT: orr.w r2, r3, r2, lsr #17 +; CHECKV7M-NEXT: orr.w r1, r1, r12, lsr #17 ; CHECKV7M-NEXT: cmp r2, #0 ; CHECKV7M-NEXT: mov r2, r0 -; CHECKV7M-NEXT: orr.w r1, r3, r1, lsr #17 ; CHECKV7M-NEXT: it ne ; CHECKV7M-NEXT: movne r2, lr ; CHECKV7M-NEXT: cmp r1, #0 @@ -646,9 +642,7 @@ ; CHECKV7M-NEXT: add r0, r2 ; CHECKV7M-NEXT: pop {r7, pc} ; CHECKV7M-NEXT: .LBB6_2: @ %else -; CHECKV7M-NEXT: lsrs r1, r2, #17 -; CHECKV7M-NEXT: orr.w r1, r1, r3, lsl #15 -; CHECKV7M-NEXT: orr.w r1, r1, r3, lsr #17 +; CHECKV7M-NEXT: orr.w r1, r3, r2, lsr #17 ; CHECKV7M-NEXT: cmp r1, #0 ; CHECKV7M-NEXT: it ne ; CHECKV7M-NEXT: movne r0, lr @@ -664,14 +658,10 @@ ; CHECKV7A-NEXT: lsls r4, r4, #31 ; CHECKV7A-NEXT: beq .LBB6_2 ; CHECKV7A-NEXT: @ %bb.1: @ %then -; CHECKV7A-NEXT: lsrs r2, r2, #17 -; CHECKV7A-NEXT: orr.w r2, r2, r3, lsl #15 -; CHECKV7A-NEXT: orr.w r2, r2, r3, lsr #17 -; CHECKV7A-NEXT: lsr.w r3, r12, #17 -; CHECKV7A-NEXT: orr.w r3, r3, r1, lsl #15 +; CHECKV7A-NEXT: orr.w r2, r3, r2, lsr #17 +; CHECKV7A-NEXT: orr.w r1, r1, r12, lsr #17 ; CHECKV7A-NEXT: cmp r2, #0 ; CHECKV7A-NEXT: mov r2, r0 -; CHECKV7A-NEXT: orr.w r1, r3, r1, lsr #17 ; CHECKV7A-NEXT: it ne ; CHECKV7A-NEXT: movne r2, lr ; CHECKV7A-NEXT: cmp r1, #0 @@ -680,9 +670,7 @@ ; CHECKV7A-NEXT: add r0, r2 ; CHECKV7A-NEXT: pop {r4, pc} ; CHECKV7A-NEXT: .LBB6_2: @ %else -; CHECKV7A-NEXT: lsrs r1, r2, #17 -; CHECKV7A-NEXT: orr.w r1, r1, r3, lsl #15 -; CHECKV7A-NEXT: orr.w r1, r1, r3, lsr #17 +; CHECKV7A-NEXT: orr.w r1, r3, r2, lsr #17 ; CHECKV7A-NEXT: cmp r1, #0 ; CHECKV7A-NEXT: it ne ; CHECKV7A-NEXT: movne r0, lr diff --git a/llvm/test/CodeGen/X86/icmp-shift-opt.ll b/llvm/test/CodeGen/X86/icmp-shift-opt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/icmp-shift-opt.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +; Optimize expanded SRL/SHL used as an input of +; SETCC comparing it with zero by removing rotation. +; +; See https://bugs.llvm.org/show_bug.cgi?id=50197 +define i128 @opt_setcc_lt_power_of_2(i128 %a) { +; CHECK-LABEL: opt_setcc_lt_power_of_2: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: addq $1, %rax +; CHECK-NEXT: adcq $0, %rdx +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: shrq $60, %rcx +; CHECK-NEXT: orq %rdx, %rcx +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: retq + br label %loop + +loop: + %phi.a = phi i128 [ %a, %0 ], [ %inc, %loop ] + %inc = add i128 %phi.a, 1 + %cmp = icmp ult i128 %inc, 1152921504606846976 + br i1 %cmp, label %exit, label %loop + +exit: + ret i128 %inc +} + +define i1 @opt_setcc_srl_eq_zero(i128 %a) { +; CHECK-LABEL: opt_setcc_srl_eq_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: shrq $17, %rdi +; CHECK-NEXT: orq %rsi, %rdi +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %srl = lshr i128 %a, 17 + %cmp = icmp eq i128 %srl, 0 + ret i1 %cmp +} + +define i1 @opt_setcc_srl_ne_zero(i128 %a) { +; CHECK-LABEL: opt_setcc_srl_ne_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: shrq $17, %rdi +; CHECK-NEXT: orq %rsi, %rdi +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq + %srl = lshr i128 %a, 17 + %cmp = icmp ne i128 %srl, 0 + ret i1 %cmp +} + +define i1 @opt_setcc_shl_eq_zero(i128 %a) { +; CHECK-LABEL: opt_setcc_shl_eq_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: shlq $17, %rsi +; CHECK-NEXT: orq %rdi, %rsi +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %shl = shl i128 %a, 17 + %cmp = icmp eq i128 %shl, 0 + ret i1 %cmp +} + +define i1 @opt_setcc_shl_ne_zero(i128 %a) { +; CHECK-LABEL: opt_setcc_shl_ne_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: shlq $17, %rsi +; CHECK-NEXT: orq %rdi, %rsi +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq + %shl = shl i128 %a, 17 + %cmp = icmp ne i128 %shl, 0 + ret i1 %cmp +} + +; Negative test: optimization should not be applied if shift has multiple users. +define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) { +; CHECK-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: shldq $17, %rdi, %rsi +; CHECK-NEXT: shlq $17, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: sete %bl +; CHECK-NEXT: callq use@PLT +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %shl = shl i128 %a, 17 + %cmp = icmp eq i128 %shl, 0 + call void @use(i128 %shl) + ret i1 %cmp +} + +; Check that optimization is applied to DAG having appropriate shape +; even if there were no actual shift's expansion. +define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) { +; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts: +; CHECK: # %bb.0: +; CHECK-NEXT: shlq $17, %rdi +; CHECK-NEXT: orq %rsi, %rdi +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %shl.a = shl i64 %a, 17 + %srl.b = lshr i64 %b, 47 + %or.0 = or i64 %shl.a, %srl.b + %shl.b = shl i64 %b, 17 + %or.1 = or i64 %or.0, %shl.b + %cmp = icmp eq i64 %or.1, 0 + ret i1 %cmp +} + +; Negative test: optimization should not be applied as +; constants used in shifts does not match. +define i1 @opt_setcc_expanded_shl_wrong_shifts(i64 %a, i64 %b) { +; CHECK-LABEL: opt_setcc_expanded_shl_wrong_shifts: +; CHECK: # %bb.0: +; CHECK-NEXT: shldq $17, %rsi, %rdi +; CHECK-NEXT: shlq $18, %rsi +; CHECK-NEXT: orq %rdi, %rsi +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %shl.a = shl i64 %a, 17 + %srl.b = lshr i64 %b, 47 + %or.0 = or i64 %shl.a, %srl.b + %shl.b = shl i64 %b, 18 + %or.1 = or i64 %or.0, %shl.b + %cmp = icmp eq i64 %or.1, 0 + ret i1 %cmp +} + +declare void @use(i128 %a);