diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1348,7 +1348,8 @@ Value *X, *Y, *Zero; if (!match(&I, m_ICmp(Pred, m_OneUse(m_IRem(m_Value(X), m_Value(Y))), m_CombineAnd(m_Zero(), m_Value(Zero))))) - return nullptr; + // try to recognize a decomposed version + return foldIRemByPowerOfTwoDecomposedToBitTest(I); if (!isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, 0, &I)) return nullptr; // This may increase instruction count, we don't enforce that Y is a constant. @@ -1357,6 +1358,41 @@ return ICmpInst::Create(Instruction::ICmp, Pred, Masked, Zero); } +/// Fold decomposed version of "X % C == 0" to "X & C-1 == 0". +/// "X % C" can also be represented as "X - (X / C) * C" which is optimized +/// into "((X / -C1) >> C2)) + X" so the latter can be folded to +/// "X & C-1" for icmp eq/ne 0 +Instruction * +InstCombiner::foldIRemByPowerOfTwoDecomposedToBitTest(ICmpInst &I) { + // This fold is only valid for equality predicates. + if (!I.isEquality()) + return nullptr; + + ICmpInst::Predicate Pred; + Value *X, *Y, *Zero; + const APInt *C1, *C2; + if (!match(&I, + m_ICmp(Pred, + m_Add(m_Shl(m_SDiv(m_Value(X), m_APInt(C1)), m_APInt(C2)), + m_Value(Y)), + m_Value(Zero)))) + return nullptr; + + // C1 should be some negative power of two number + if ((X != Y) || !C1->isNegative() || !C1->abs().isPowerOf2()) + return nullptr; + + // 1 << C2 == C1 + APInt one(C2->getBitWidth(), 1); + if ((C1->abs() != one.shl(*C2)) || C2->sle(one)) + return nullptr; + + // Replace with "X & C-1 ==/!= 0" + uint64_t AndMask = C1->abs().getZExtValue() - 1; + Value *And = Builder.CreateAnd(X, ConstantInt::get(X->getType(), AndMask)); + return new ICmpInst(Pred, And, Zero); +} + /// Fold equality-comparison between zero and any (maybe truncated) right-shift /// by one-less-than-bitwidth into a sign test on the original value. Instruction *InstCombiner::foldSignBitTest(ICmpInst &I) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -934,6 +934,7 @@ Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ); Instruction *foldICmpEquality(ICmpInst &Cmp); Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I); + Instruction *foldIRemByPowerOfTwoDecomposedToBitTest(ICmpInst &I); Instruction *foldSignBitTest(ICmpInst &I); Instruction *foldICmpWithZero(ICmpInst &Cmp); diff --git a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll --- a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll +++ b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll @@ -38,6 +38,19 @@ ret i1 %r } +define i1 @is_rem32_pos_decomposed_i8(i8 %x) { +; CHECK-LABEL: @is_rem32_pos_i8( +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], -97 +; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %d = sdiv i8 %x, 32 + %m = mul nsw i8 %d, 32 + %s = sub nsw i8 %x, %m + %r = icmp eq i8 %s, 0 + ret i1 %r +} + ; i16 -32765 == 32771 == 0b1000000000000011 define i1 @is_rem4_neg_i16(i16 %x) {