Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4296,12 +4296,20 @@ if (!VT.isVector()) { // TODO: Support this for vectors. // X >= C0 --> X > (C0 - 1) + // For the const C1 is equal to 2^n, which is legal for ICmp, if the + // type is not opaque, so this transform will bring in C equal to + // (2^n - 1), which is not legal for ICmp. Then, on the next iteration, + // we will try to legal the const to 2^n for performance, so we need + // prevent this scene to avoid infinitely transform. APInt C = C1 - 1; ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; if ((DCI.isBeforeLegalizeOps() || isCondCodeLegal(NewCC, VT.getSimpleVT())) && - (!N1C->isOpaque() || (C.getBitWidth() <= 64 && - isLegalICmpImmediate(C.getSExtValue())))) { + ((!N1C->isOpaque() && (C.getBitWidth() > 64 || + !isLegalICmpImmediate(C1.getSExtValue()) || + isLegalICmpImmediate(C.getSExtValue()))) || + (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { return DAG.getSetCC(dl, VT, N0, DAG.getConstant(C, dl, N1.getValueType()), NewCC); @@ -4316,12 +4324,20 @@ // X <= C0 --> X < (C0 + 1) if (!VT.isVector()) { // TODO: Support this for vectors. + // For the const C1 is equal to 2^n, which is legal for ICmp, if the + // type is not opaque, so this transform will bring in C equal to + // (2^n + 1), which is not legal for ICmp. Then, on the next iteration, + // we will try to legal the const to 2^n for performance, so we need + // prevent this scene to avoid infinitely transform. APInt C = C1 + 1; ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; if ((DCI.isBeforeLegalizeOps() || isCondCodeLegal(NewCC, VT.getSimpleVT())) && - (!N1C->isOpaque() || (C.getBitWidth() <= 64 && - isLegalICmpImmediate(C.getSExtValue())))) { + ((!N1C->isOpaque() && (C.getBitWidth() > 64 || + !isLegalICmpImmediate(C1.getSExtValue()) || + isLegalICmpImmediate(C.getSExtValue()))) || + (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { return DAG.getSetCC(dl, VT, N0, DAG.getConstant(C, dl, N1.getValueType()), NewCC); @@ -4520,10 +4536,6 @@ } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE || Cond == ISD::SETULE || Cond == ISD::SETUGT) { bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT); - // X < 0x100000000 -> (X >> 32) < 1 - // X >= 0x100000000 -> (X >> 32) >= 1 - // X <= 0x0ffffffff -> (X >> 32) < 1 - // X > 0x0ffffffff -> (X >> 32) >= 1 unsigned ShiftBits; APInt NewC = C1; ISD::CondCode NewCond = Cond; @@ -4538,10 +4550,25 @@ if (ShiftBits && NewC.getMinSignedBits() <= 64 && isLegalICmpImmediate(NewC.getSExtValue()) && !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) { - SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0, - DAG.getConstant(ShiftBits, dl, ShiftTy)); - SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy); - return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond); + if (isLegalICmpImmediate(NewC.getSExtValue() << ShiftBits)) { + // Single instruction can describe as (NewC << ShiftBits) is legal + // (X >> ShiftBits) < NewC -> X < (NewC << ShiftBits) + // (X >> ShiftBits) >= NewC -> X >= (NewC << ShiftBits) + // ((X+1) >> ShiftBits) <= NewC -> X < (NewC << ShiftBits) + // ((X+1) >> ShiftBits) > NewC -> X >= (NewC << ShiftBits) + SDValue Shift = DAG.getConstant(NewC.getSExtValue() << ShiftBits, + dl, ShValTy); + return DAG.getSetCC(dl, VT, N0, Shift, NewCond); + } else { + // X < 0x100000000 -> (X >> 32) < 1 + // X >= 0x100000000 -> (X >> 32) >= 1 + // X <= 0x0ffffffff -> (X >> 32) < 1 + // X > 0x0ffffffff -> (X >> 32) >= 1 + SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0, + DAG.getConstant(ShiftBits, dl, ShiftTy)); + SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond); + } } } } @@ -6039,7 +6066,7 @@ Created.push_back(TautologicalInvertedChannels.getNode()); // NOTE: we avoid letting illegal types through even if we're before legalize - // ops – legalization has a hard time producing good code for this. + // ops - legalization has a hard time producing good code for this. if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) { // If we have a vector select, let's replace the comparison results in the // affected lanes with the correct tautological result. @@ -6297,7 +6324,7 @@ assert(VT.isVector() && "Can/should only get here for vectors."); // NOTE: we avoid letting illegal types through even if we're before legalize - // ops – legalization has a hard time producing good code for the code that + // ops - legalization has a hard time producing good code for the code that // follows. if (!isOperationLegalOrCustom(ISD::SETEQ, VT) || !isOperationLegalOrCustom(ISD::AND, VT) || Index: llvm/test/CodeGen/AArch64/icmp-shift-opt.ll =================================================================== --- llvm/test/CodeGen/AArch64/icmp-shift-opt.ll +++ llvm/test/CodeGen/AArch64/icmp-shift-opt.ll @@ -150,4 +150,28 @@ ret i1 %cmp } +; bool high_mask1 (unsigned int x) { return (x >> 13) == 0; } +define i1 @high_mask1(i32 noundef %x) { +; CHECK-LABEL: high_mask1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp w0, #2, lsl #12 // =8192 +; CHECK-NEXT: cset w0, hs +; CHECK-NEXT: ret +entry: + %cmp = icmp ugt i32 %x, 8191 + ret i1 %cmp +} + +; bool high_mask2 (unsigned int x) { return (x >> 13) != 0; } +define i1 @high_mask2(i32 noundef %x) { +; CHECK-LABEL: high_mask2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp w0, #2, lsl #12 // =8192 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret +entry: + %cmp = icmp ult i32 %x, 8192 + ret i1 %cmp +} + declare void @use(i128 %a) Index: llvm/test/CodeGen/ARM/and-cmpz.ll =================================================================== --- llvm/test/CodeGen/ARM/and-cmpz.ll +++ llvm/test/CodeGen/ARM/and-cmpz.ll @@ -93,8 +93,7 @@ ; T1-NEXT: lsrs r0, r0, #9 ; T1-NEXT: bne ; T2: uxth r0, r0 -; T2-NEXT: movs r2, #0 -; T2-NEXT: cmp.w r2, r0, lsr #9 +; T2-NEXT: cmp.w r0, #512 define void @i16_cmpz(i16 %x, void (i32)* %foo) { entry: %cmp = icmp ult i16 %x, 512 Index: llvm/test/CodeGen/ARM/consthoist-icmpimm.ll =================================================================== --- llvm/test/CodeGen/ARM/consthoist-icmpimm.ll +++ llvm/test/CodeGen/ARM/consthoist-icmpimm.ll @@ -630,22 +630,22 @@ ; CHECKV7M-NEXT: ldrd lr, r0, [sp, #8] ; CHECKV7M-NEXT: beq .LBB6_2 ; CHECKV7M-NEXT: @ %bb.1: @ %then -; CHECKV7M-NEXT: orr.w r2, r3, r2, lsr #17 -; CHECKV7M-NEXT: orr.w r1, r1, r12, lsr #17 -; CHECKV7M-NEXT: cmp r2, #0 +; CHECKV7M-NEXT: subs.w r2, r2, #131072 +; CHECKV7M-NEXT: sbcs r2, r3, #0 ; CHECKV7M-NEXT: mov r2, r0 -; CHECKV7M-NEXT: it ne -; CHECKV7M-NEXT: movne r2, lr -; CHECKV7M-NEXT: cmp r1, #0 -; CHECKV7M-NEXT: it ne -; CHECKV7M-NEXT: movne r0, lr +; CHECKV7M-NEXT: it hs +; CHECKV7M-NEXT: movhs r2, lr +; CHECKV7M-NEXT: subs.w r3, r12, #131072 +; CHECKV7M-NEXT: sbcs r1, r1, #0 +; CHECKV7M-NEXT: it hs +; CHECKV7M-NEXT: movhs r0, lr ; CHECKV7M-NEXT: add r0, r2 ; CHECKV7M-NEXT: pop {r7, pc} ; CHECKV7M-NEXT: .LBB6_2: @ %else -; CHECKV7M-NEXT: orr.w r1, r3, r2, lsr #17 -; CHECKV7M-NEXT: cmp r1, #0 -; CHECKV7M-NEXT: it ne -; CHECKV7M-NEXT: movne r0, lr +; CHECKV7M-NEXT: subs.w r1, r2, #131072 +; CHECKV7M-NEXT: sbcs r1, r3, #0 +; CHECKV7M-NEXT: it hs +; CHECKV7M-NEXT: movhs r0, lr ; CHECKV7M-NEXT: pop {r7, pc} ; ; CHECKV7A-LABEL: icmp64_uge_m2: @@ -658,22 +658,22 @@ ; CHECKV7A-NEXT: lsls r4, r4, #31 ; CHECKV7A-NEXT: beq .LBB6_2 ; CHECKV7A-NEXT: @ %bb.1: @ %then -; CHECKV7A-NEXT: orr.w r2, r3, r2, lsr #17 -; CHECKV7A-NEXT: orr.w r1, r1, r12, lsr #17 -; CHECKV7A-NEXT: cmp r2, #0 +; CHECKV7A-NEXT: subs.w r2, r2, #131072 +; CHECKV7A-NEXT: sbcs r2, r3, #0 ; CHECKV7A-NEXT: mov r2, r0 -; CHECKV7A-NEXT: it ne -; CHECKV7A-NEXT: movne r2, lr -; CHECKV7A-NEXT: cmp r1, #0 -; CHECKV7A-NEXT: it ne -; CHECKV7A-NEXT: movne r0, lr +; CHECKV7A-NEXT: it hs +; CHECKV7A-NEXT: movhs r2, lr +; CHECKV7A-NEXT: subs.w r3, r12, #131072 +; CHECKV7A-NEXT: sbcs r1, r1, #0 +; CHECKV7A-NEXT: it hs +; CHECKV7A-NEXT: movhs r0, lr ; CHECKV7A-NEXT: add r0, r2 ; CHECKV7A-NEXT: pop {r4, pc} ; CHECKV7A-NEXT: .LBB6_2: @ %else -; CHECKV7A-NEXT: orr.w r1, r3, r2, lsr #17 -; CHECKV7A-NEXT: cmp r1, #0 -; CHECKV7A-NEXT: it ne -; CHECKV7A-NEXT: movne r0, lr +; CHECKV7A-NEXT: subs.w r1, r2, #131072 +; CHECKV7A-NEXT: sbcs r1, r3, #0 +; CHECKV7A-NEXT: it hs +; CHECKV7A-NEXT: movhs r0, lr ; CHECKV7A-NEXT: pop {r4, pc} br i1 %c, label %then, label %else then: Index: llvm/test/CodeGen/ARM/icmp-shift-opt.ll =================================================================== --- llvm/test/CodeGen/ARM/icmp-shift-opt.ll +++ llvm/test/CodeGen/ARM/icmp-shift-opt.ll @@ -12,9 +12,9 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r0, r0, #1 ; CHECK-NEXT: adc r1, r1, #0 -; CHECK-NEXT: orr r2, r1, r0, lsr #16 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: bne .LBB0_1 +; CHECK-NEXT: subs r2, r0, #65536 +; CHECK-NEXT: sbcs r2, r1, #0 +; CHECK-NEXT: bhs .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %exit ; CHECK-NEXT: bx lr br label %loop