Index: llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2584,6 +2584,41 @@ return nullptr; } +/// Tries to reduce a pattern that arises when calculating the remainder of the +/// Euclidean division. When the divisor is a power of two and is guaranteed not +/// to be negative, a signed remainder can be folded with a bitwise and. +/// +/// (x % n) < 0 ? (x % n) + n : (x % n) +/// -> x & (n - 1) +static Instruction *foldSelectWithSRem(SelectInst &SI) { + Value *CondVal = SI.getCondition(); + Value *TrueVal = SI.getTrueValue(); + Value *FalseVal = SI.getFalseValue(); + + ICmpInst::Predicate Pred; + Value *Op, *Rem, *CmpCond; + + // We are matching a quite specific pattern here: + // %rem = srem i32 %x, %n + // %cnd = icmp slt i32 %rem, 0 + // %add = add nsw i32 %rem, %n + // %sel = select i1 %cnd, i32 %add, i32 %rem + const APInt *I0, *I1; + if (!(match(TrueVal, m_Add(m_Value(Rem), m_Power2(I0))) && + match(Rem, m_SRem(m_Value(Op), m_Power2(I1))) && FalseVal == Rem && + I0 == I1)) + return nullptr; + + // Signed comparisons have been canonicalized into a ICMP_SLT at this point, + // we can safely check this type of comparison. + if (!(match(CondVal, m_ICmp(Pred, m_Value(CmpCond), m_Zero())) && + Pred == ICmpInst::ICMP_SLT && CmpCond == Rem)) + return nullptr; + + Value *Mask = ConstantInt::get(Rem->getType(), I0->getZExtValue() - 1); + return BinaryOperator::CreateAnd(Op, Mask); +} + static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) { FreezeInst *FI = dyn_cast(Sel.getCondition()); if (!FI) @@ -3616,6 +3651,9 @@ if (Instruction *I = foldVectorSelect(SI)) return I; + if (Instruction *I = foldSelectWithSRem(SI)) + return I; + // If we can compute the condition, there's no need for a select. // Like the above fold, we are attempting to reduce compile-time cost by // putting this fold here with limitations rather than in InstSimplify. Index: llvm/test/Transforms/InstCombine/select-divrem.ll =================================================================== --- llvm/test/Transforms/InstCombine/select-divrem.ll +++ llvm/test/Transforms/InstCombine/select-divrem.ll @@ -213,3 +213,27 @@ %sel = select i1 %b, i5 %r2, i5 %r1 ret i5 %sel } + +define i32 @rem_euclid_1(i32 %0) { +; CHECK-LABEL: @rem_euclid_1( +; CHECK-NEXT: [[SEL:%.*]] = and i32 [[TMP0:%.*]], 7 +; CHECK-NEXT: ret i32 [[SEL]] +; + %rem = srem i32 %0, 8 + %cond = icmp slt i32 %rem, 0 + %add = add nsw i32 %rem, 8 + %sel = select i1 %cond, i32 %add, i32 %rem + ret i32 %sel +} + +define i32 @rem_euclid_2(i32 %0) { +; CHECK-LABEL: @rem_euclid_2( +; CHECK-NEXT: [[SEL:%.*]] = and i32 [[TMP0:%.*]], 7 +; CHECK-NEXT: ret i32 [[SEL]] +; + %rem = srem i32 %0, 8 + %cond = icmp sgt i32 %rem, -1 + %add = add nsw i32 %rem, 8 + %sel = select i1 %cond, i32 %rem, i32 %add + ret i32 %sel +}