diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4676,6 +4676,14 @@ EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const; + // Optimize SETCC comparing expanded SRL/SHL with zero by performing following + // transformations on SETCC's LHS: + // (or (or (srl X, C0), (shl Y, C1)), (srl Y, C0)) --> (or (srl X, C0), Y) + // (or (or (srl Y, C0), (shl X, C1)), (shl Y, C1)) --> (or (shl X, C1), Y) + SDValue optimizeSetCCOfExpandedShift(EVT SCCVT, SDValue N0, SDValue N1C, + ISD::CondCode Cond, DAGCombinerInfo &DCI, + const SDLoc &DL) const; + SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL, diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3390,6 +3390,71 @@ return T2; } +// (or (or (srl X, C0), (shl Y, C1)), (srl Y, C0)) ==/!= 0 +// --> (or (srl X, C0), Y) ==/!= 0 +// (or (or (srl Y, C0), (shl X, C1)), (shl Y, C1)) ==/!= 0 +// --> (or (shl X, C1), Y) ==/!= 0 +SDValue TargetLowering::optimizeSetCCOfExpandedShift(EVT SCCVT, SDValue N0, + SDValue N1C, + ISD::CondCode Cond, + DAGCombinerInfo &DCI, + const SDLoc &DL) const { + assert(isConstOrConstSplat(N1C) && + isConstOrConstSplat(N1C)->getAPIntValue().isZero() && + "Should be a comparison with 0."); + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode"); + + if (N0.getOpcode() != ISD::OR || !N0.hasOneUse() || + !N0.getOperand(0).hasOneUse() || !N0.getOperand(1).hasOneUse()) + return SDValue(); + + SDValue Or = N0.getOperand(0); + SDValue Shift = N0.getOperand(1); + if (Or.getOpcode() != ISD::OR) { + if (Shift.getOpcode() != ISD::OR) + return SDValue(); + std::swap(Or, Shift); + } + if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) + return SDValue(); + + // Nested Or's SRL and SHL arms + SDValue OrSrl = Or.getOperand(0); + SDValue OrShl = Or.getOperand(1); + if (OrSrl.getOpcode() != ISD::SRL) + std::swap(OrSrl, OrShl); + if (OrSrl.getOpcode() != ISD::SRL || OrShl.getOpcode() != ISD::SHL) + return SDValue(); + + // Nested Or's arms shifting X and Y + SDValue OrShiftX = Shift.getOpcode() == ISD::SRL ? OrSrl : OrShl; + SDValue OrShiftY = Shift.getOpcode() == ISD::SRL ? OrShl : OrSrl; + + if (OrShiftY.getOperand(0) == Shift.getOperand(0)) { + unsigned OpSizeInBits = N0.getValueType().getScalarSizeInBits(); + auto MatchConstants = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + return (LHS->getAPIntValue() + RHS->getAPIntValue()) == OpSizeInBits; + }; + auto SameConstant = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { + return LHS->getAPIntValue() == RHS->getAPIntValue(); + }; + + SDValue C0 = OrSrl.getOperand(1); + SDValue C1 = OrShl.getOperand(1); + SDValue ShiftC = Shift.getOperand(1); + SDValue OrShiftXC = OrShiftX.getOperand(1); + if (ISD::matchBinaryPredicate(C0, C1, MatchConstants) && + ISD::matchBinaryPredicate(OrShiftXC, ShiftC, SameConstant)) { + SelectionDAG &DAG = DCI.DAG; + SDValue NewOr = DAG.getNode(ISD::OR, DL, N0.getValueType(), OrShiftX, + Shift.getOperand(0)); + return DAG.getSetCC(DL, SCCVT, NewOr, N1C, Cond); + } + } + return SDValue(); +} + /// Try to fold an equality comparison with a {add/sub/xor} binary operation as /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to /// handle the commuted versions of these patterns. @@ -4010,12 +4075,21 @@ } if (Cond == ISD::SETEQ || Cond == ISD::SETNE) { - // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <> Y) & C) ==/!= 0 - if (C1.isZero()) + if (C1.isZero()) { + // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <> Y) & C) ==/!= 0 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift( VT, N0, N1, Cond, DCI, dl)) return CC; + // (or (or (srl X, C0), (shl Y, C1)), (srl Y, C0)) ==/!= 0 + // --> (or (srl X, C0), Y) ==/!= 0 + // (or (or (srl Y, C0), (shl X, C1)), (shl Y, C1)) ==/!= 0 + // --> (or (shl X, C1), Y) ==/!= 0 + if (SDValue CC = + optimizeSetCCOfExpandedShift(VT, N0, N1, Cond, DCI, dl)) + return CC; + } + // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y). // For example, when high 32-bits of i64 X are known clear: // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0 diff --git a/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll b/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll --- a/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll @@ -12,8 +12,7 @@ ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds x0, x0, #1 ; CHECK-NEXT: adcs x1, x1, xzr -; CHECK-NEXT: extr x8, x1, x0, #60 -; CHECK-NEXT: orr x8, x8, x1, lsr #60 +; CHECK-NEXT: orr x8, x1, x0, lsr #60 ; CHECK-NEXT: cbnz x8, .LBB0_1 ; CHECK-NEXT: // %bb.2: // %exit ; CHECK-NEXT: ret @@ -32,8 +31,7 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind { ; CHECK-LABEL: opt_setcc_srl_eq_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: extr x8, x1, x0, #17 -; CHECK-NEXT: orr x8, x8, x1, lsr #17 +; CHECK-NEXT: orr x8, x1, x0, lsr #17 ; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret @@ -45,8 +43,7 @@ define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind { ; CHECK-LABEL: opt_setcc_srl_ne_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: extr x8, x1, x0, #17 -; CHECK-NEXT: orr x8, x8, x1, lsr #17 +; CHECK-NEXT: orr x8, x1, x0, lsr #17 ; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret @@ -58,8 +55,7 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_eq_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: extr x8, x1, x0, #47 -; CHECK-NEXT: orr x8, x8, x0, lsl #17 +; CHECK-NEXT: orr x8, x0, x1, lsl #17 ; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret @@ -71,8 +67,7 @@ define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_ne_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: extr x8, x1, x0, #47 -; CHECK-NEXT: orr x8, x8, x0, lsl #17 +; CHECK-NEXT: orr x8, x0, x1, lsl #17 ; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret @@ -106,8 +101,7 @@ define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts: ; CHECK: // %bb.0: -; CHECK-NEXT: extr x8, x0, x1, #47 -; CHECK-NEXT: orr x8, x8, x1, lsl #17 +; CHECK-NEXT: orr x8, x1, x0, lsl #17 ; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll --- a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll +++ b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll @@ -630,14 +630,10 @@ ; CHECKV7M-NEXT: ldrd lr, r0, [sp, #8] ; CHECKV7M-NEXT: beq .LBB6_2 ; CHECKV7M-NEXT: @ %bb.1: @ %then -; CHECKV7M-NEXT: lsrs r2, r2, #17 -; CHECKV7M-NEXT: orr.w r2, r2, r3, lsl #15 -; CHECKV7M-NEXT: orr.w r2, r2, r3, lsr #17 -; CHECKV7M-NEXT: lsr.w r3, r12, #17 -; CHECKV7M-NEXT: orr.w r3, r3, r1, lsl #15 +; CHECKV7M-NEXT: orr.w r2, r3, r2, lsr #17 +; CHECKV7M-NEXT: orr.w r1, r1, r12, lsr #17 ; CHECKV7M-NEXT: cmp r2, #0 ; CHECKV7M-NEXT: mov r2, r0 -; CHECKV7M-NEXT: orr.w r1, r3, r1, lsr #17 ; CHECKV7M-NEXT: it ne ; CHECKV7M-NEXT: movne r2, lr ; CHECKV7M-NEXT: cmp r1, #0 @@ -646,9 +642,7 @@ ; CHECKV7M-NEXT: add r0, r2 ; CHECKV7M-NEXT: pop {r7, pc} ; CHECKV7M-NEXT: .LBB6_2: @ %else -; CHECKV7M-NEXT: lsrs r1, r2, #17 -; CHECKV7M-NEXT: orr.w r1, r1, r3, lsl #15 -; CHECKV7M-NEXT: orr.w r1, r1, r3, lsr #17 +; CHECKV7M-NEXT: orr.w r1, r3, r2, lsr #17 ; CHECKV7M-NEXT: cmp r1, #0 ; CHECKV7M-NEXT: it ne ; CHECKV7M-NEXT: movne r0, lr @@ -664,14 +658,10 @@ ; CHECKV7A-NEXT: lsls r4, r4, #31 ; CHECKV7A-NEXT: beq .LBB6_2 ; CHECKV7A-NEXT: @ %bb.1: @ %then -; CHECKV7A-NEXT: lsrs r2, r2, #17 -; CHECKV7A-NEXT: orr.w r2, r2, r3, lsl #15 -; CHECKV7A-NEXT: orr.w r2, r2, r3, lsr #17 -; CHECKV7A-NEXT: lsr.w r3, r12, #17 -; CHECKV7A-NEXT: orr.w r3, r3, r1, lsl #15 +; CHECKV7A-NEXT: orr.w r2, r3, r2, lsr #17 +; CHECKV7A-NEXT: orr.w r1, r1, r12, lsr #17 ; CHECKV7A-NEXT: cmp r2, #0 ; CHECKV7A-NEXT: mov r2, r0 -; CHECKV7A-NEXT: orr.w r1, r3, r1, lsr #17 ; CHECKV7A-NEXT: it ne ; CHECKV7A-NEXT: movne r2, lr ; CHECKV7A-NEXT: cmp r1, #0 @@ -680,9 +670,7 @@ ; CHECKV7A-NEXT: add r0, r2 ; CHECKV7A-NEXT: pop {r4, pc} ; CHECKV7A-NEXT: .LBB6_2: @ %else -; CHECKV7A-NEXT: lsrs r1, r2, #17 -; CHECKV7A-NEXT: orr.w r1, r1, r3, lsl #15 -; CHECKV7A-NEXT: orr.w r1, r1, r3, lsr #17 +; CHECKV7A-NEXT: orr.w r1, r3, r2, lsr #17 ; CHECKV7A-NEXT: cmp r1, #0 ; CHECKV7A-NEXT: it ne ; CHECKV7A-NEXT: movne r0, lr diff --git a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll --- a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll @@ -12,9 +12,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r0, r0, #1 ; CHECK-NEXT: adc r1, r1, #0 -; CHECK-NEXT: lsr r2, r0, #16 -; CHECK-NEXT: orr r2, r2, r1, lsl #16 -; CHECK-NEXT: orr r2, r2, r1, lsr #16 +; CHECK-NEXT: orr r2, r1, r0, lsr #16 ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: bne .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %exit @@ -34,9 +32,7 @@ define i1 @opt_setcc_srl_eq_zero(i64 %a) nounwind { ; CHECK-LABEL: opt_setcc_srl_eq_zero: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsr r0, r0, #17 -; CHECK-NEXT: orr r0, r0, r1, lsl #15 -; CHECK-NEXT: orr r0, r0, r1, lsr #17 +; CHECK-NEXT: orr r0, r1, r0, lsr #17 ; CHECK-NEXT: clz r0, r0 ; CHECK-NEXT: lsr r0, r0, #5 ; CHECK-NEXT: bx lr @@ -48,9 +44,7 @@ define i1 @opt_setcc_srl_ne_zero(i64 %a) nounwind { ; CHECK-LABEL: opt_setcc_srl_ne_zero: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsr r0, r0, #17 -; CHECK-NEXT: orr r0, r0, r1, lsl #15 -; CHECK-NEXT: orr r0, r0, r1, lsr #17 +; CHECK-NEXT: orr r0, r1, r0, lsr #17 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: movwne r0, #1 ; CHECK-NEXT: bx lr @@ -62,9 +56,7 @@ define i1 @opt_setcc_shl_eq_zero(i64 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_eq_zero: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsl r1, r1, #17 -; CHECK-NEXT: orr r1, r1, r0, lsr #15 -; CHECK-NEXT: orr r0, r1, r0, lsl #17 +; CHECK-NEXT: orr r0, r0, r1, lsl #17 ; CHECK-NEXT: clz r0, r0 ; CHECK-NEXT: lsr r0, r0, #5 ; CHECK-NEXT: bx lr @@ -76,9 +68,7 @@ define i1 @opt_setcc_shl_ne_zero(i64 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_ne_zero: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsl r1, r1, #17 -; CHECK-NEXT: orr r1, r1, r0, lsr #15 -; CHECK-NEXT: orr r0, r1, r0, lsl #17 +; CHECK-NEXT: orr r0, r0, r1, lsl #17 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: movwne r0, #1 ; CHECK-NEXT: bx lr @@ -113,9 +103,7 @@ define i1 @opt_setcc_expanded_shl_correct_shifts(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsl r0, r0, #17 -; CHECK-NEXT: orr r0, r0, r1, lsr #15 -; CHECK-NEXT: orr r0, r0, r1, lsl #17 +; CHECK-NEXT: orr r0, r1, r0, lsl #17 ; CHECK-NEXT: clz r0, r0 ; CHECK-NEXT: lsr r0, r0, #5 ; CHECK-NEXT: bx lr diff --git a/llvm/test/CodeGen/X86/icmp-shift-opt.ll b/llvm/test/CodeGen/X86/icmp-shift-opt.ll --- a/llvm/test/CodeGen/X86/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/X86/icmp-shift-opt.ll @@ -56,11 +56,9 @@ ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: addq $1, %rax ; X64-NEXT: adcq $0, %rdx -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: shldq $4, %rax, %rcx -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: shrq $60, %rsi -; X64-NEXT: orq %rcx, %rsi +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: shrq $60, %rcx +; X64-NEXT: orq %rdx, %rcx ; X64-NEXT: jne .LBB0_1 ; X64-NEXT: # %bb.2: # %exit ; X64-NEXT: retq @@ -100,9 +98,8 @@ ; ; X64-LABEL: opt_setcc_srl_eq_zero: ; X64: # %bb.0: -; X64-NEXT: shrdq $17, %rsi, %rdi -; X64-NEXT: shrq $17, %rsi -; X64-NEXT: orq %rdi, %rsi +; X64-NEXT: shrq $17, %rdi +; X64-NEXT: orq %rsi, %rdi ; X64-NEXT: sete %al ; X64-NEXT: retq %srl = lshr i128 %a, 17 @@ -134,9 +131,8 @@ ; ; X64-LABEL: opt_setcc_srl_ne_zero: ; X64: # %bb.0: -; X64-NEXT: shrdq $17, %rsi, %rdi -; X64-NEXT: shrq $17, %rsi -; X64-NEXT: orq %rdi, %rsi +; X64-NEXT: shrq $17, %rdi +; X64-NEXT: orq %rsi, %rdi ; X64-NEXT: setne %al ; X64-NEXT: retq %srl = lshr i128 %a, 17 @@ -165,9 +161,8 @@ ; ; X64-LABEL: opt_setcc_shl_eq_zero: ; X64: # %bb.0: -; X64-NEXT: shldq $17, %rdi, %rsi -; X64-NEXT: shlq $17, %rdi -; X64-NEXT: orq %rsi, %rdi +; X64-NEXT: shlq $17, %rsi +; X64-NEXT: orq %rdi, %rsi ; X64-NEXT: sete %al ; X64-NEXT: retq %shl = shl i128 %a, 17 @@ -196,9 +191,8 @@ ; ; X64-LABEL: opt_setcc_shl_ne_zero: ; X64: # %bb.0: -; X64-NEXT: shldq $17, %rdi, %rsi -; X64-NEXT: shlq $17, %rdi -; X64-NEXT: orq %rsi, %rdi +; X64-NEXT: shlq $17, %rsi +; X64-NEXT: orq %rdi, %rsi ; X64-NEXT: setne %al ; X64-NEXT: retq %shl = shl i128 %a, 17 @@ -262,27 +256,20 @@ define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind { ; X86-LABEL: opt_setcc_expanded_shl_correct_shifts: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shldl $17, %edx, %esi -; X86-NEXT: shldl $17, %ecx, %edx ; X86-NEXT: shldl $17, %eax, %ecx ; X86-NEXT: shll $17, %eax -; X86-NEXT: orl %edx, %eax -; X86-NEXT: orl %esi, %ecx -; X86-NEXT: orl %eax, %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl %ecx, %eax ; X86-NEXT: sete %al -; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_expanded_shl_correct_shifts: ; X64: # %bb.0: -; X64-NEXT: shldq $17, %rsi, %rdi -; X64-NEXT: shlq $17, %rsi -; X64-NEXT: orq %rdi, %rsi +; X64-NEXT: shlq $17, %rdi +; X64-NEXT: orq %rsi, %rdi ; X64-NEXT: sete %al ; X64-NEXT: retq %shl.a = shl i64 %a, 17