Index: llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2584,6 +2584,43 @@ return nullptr; } +/// Tries to reduce a pattern that arises when calculating the remainder of the +/// Euclidean division. When the divisor is a power of two and is guaranteed not +/// to be negative, a signed remainder can be folded with a bitwise and. +/// +/// (x % n) < 0 ? (x % n) + n : (x % n) +/// -> x & (n - 1) +static Instruction *foldSelectWithSRem(SelectInst &SI, InstCombinerImpl &IC, + IRBuilderBase &Builder) { + Value *CondVal = SI.getCondition(); + Value *TrueVal = SI.getTrueValue(); + Value *FalseVal = SI.getFalseValue(); + + ICmpInst::Predicate Pred; + Value *Op, *RemRes, *Remainder; + + // Signed comparisons have been canonicalized into a ICMP_SLT at this point, + // we can safely check this type of comparison. + if (!(match(CondVal, m_ICmp(Pred, m_Value(RemRes), m_Zero())) && + Pred == ICmpInst::ICMP_SLT)) + return nullptr; + + // We are matching a quite specific pattern here: + // %rem = srem i32 %x, %n + // %cnd = icmp slt i32 %rem, 0 + // %add = add i32 %rem, %n + // %sel = select i1 %cnd, i32 %add, i32 %rem + if (!(match(TrueVal, m_Add(m_Value(RemRes), m_Value(Remainder))) && + match(RemRes, m_SRem(m_Value(Op), m_Specific(Remainder))) && + FalseVal == RemRes && + IC.isKnownToBeAPowerOfTwo(Remainder, /*OrZero*/ true))) + return nullptr; + + Value *Add = Builder.CreateAdd(Remainder, + Constant::getAllOnesValue(RemRes->getType())); + return BinaryOperator::CreateAnd(Op, Add); +} + static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) { FreezeInst *FI = dyn_cast(Sel.getCondition()); if (!FI) @@ -3655,6 +3692,9 @@ if (Value *V = foldRoundUpIntegerWithPow2Alignment(SI, Builder)) return replaceInstUsesWith(SI, V); + if (Instruction *I = foldSelectWithSRem(SI, *this, Builder)) + return I; + // select(mask, mload(,,mask,0), 0) -> mload(,,mask,0) // Load inst is intentionally not checked for hasOneUse() if (match(FalseVal, m_Zero()) && Index: llvm/test/Transforms/InstCombine/select-divrem.ll =================================================================== --- llvm/test/Transforms/InstCombine/select-divrem.ll +++ llvm/test/Transforms/InstCombine/select-divrem.ll @@ -216,10 +216,7 @@ define i32 @rem_euclid_1(i32 %0) { ; CHECK-LABEL: @rem_euclid_1( -; CHECK-NEXT: [[REM:%.*]] = srem i32 [[TMP0:%.*]], 8 -; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[REM]], 0 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[REM]], 8 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], i32 [[ADD]], i32 [[REM]] +; CHECK-NEXT: [[SEL:%.*]] = and i32 [[TMP0:%.*]], 7 ; CHECK-NEXT: ret i32 [[SEL]] ; %rem = srem i32 %0, 8 @@ -231,10 +228,7 @@ define i32 @rem_euclid_2(i32 %0) { ; CHECK-LABEL: @rem_euclid_2( -; CHECK-NEXT: [[REM:%.*]] = srem i32 [[TMP0:%.*]], 8 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[REM]], 8 -; CHECK-NEXT: [[COND1:%.*]] = icmp slt i32 [[REM]], 0 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND1]], i32 [[ADD]], i32 [[REM]] +; CHECK-NEXT: [[SEL:%.*]] = and i32 [[TMP0:%.*]], 7 ; CHECK-NEXT: ret i32 [[SEL]] ; %rem = srem i32 %0, 8 @@ -291,10 +285,7 @@ define <2 x i32> @rem_euclid_vec(<2 x i32> %0) { ; CHECK-LABEL: @rem_euclid_vec( -; CHECK-NEXT: [[REM:%.*]] = srem <2 x i32> [[TMP0:%.*]], -; CHECK-NEXT: [[COND:%.*]] = icmp slt <2 x i32> [[REM]], zeroinitializer -; CHECK-NEXT: [[ADD:%.*]] = add nsw <2 x i32> [[REM]], -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[COND]], <2 x i32> [[ADD]], <2 x i32> [[REM]] +; CHECK-NEXT: [[SEL:%.*]] = and <2 x i32> [[TMP0:%.*]], ; CHECK-NEXT: ret <2 x i32> [[SEL]] ; %rem = srem <2 x i32> %0, @@ -306,10 +297,7 @@ define i128 @rem_euclid_i128(i128 %0) { ; CHECK-LABEL: @rem_euclid_i128( -; CHECK-NEXT: [[REM:%.*]] = srem i128 [[TMP0:%.*]], 8 -; CHECK-NEXT: [[COND:%.*]] = icmp slt i128 [[REM]], 0 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i128 [[REM]], 8 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], i128 [[ADD]], i128 [[REM]] +; CHECK-NEXT: [[SEL:%.*]] = and i128 [[TMP0:%.*]], 7 ; CHECK-NEXT: ret i128 [[SEL]] ; %rem = srem i128 %0, 8