Index: llvm/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/include/llvm/Analysis/ScalarEvolution.h +++ llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1845,6 +1845,10 @@ /// accordingly. void addToLoopUseLists(const SCEV *S); + /// Try to match the pattern generated by getURemExpr(A, B). If successful, + /// Assign A and B to LHS and RHS, respectively. + bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS); + FoldingSet UniqueSCEVs; FoldingSet UniquePreds; BumpPtrAllocator SCEVAllocator; Index: llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolution.cpp +++ llvm/lib/Analysis/ScalarEvolution.cpp @@ -1562,6 +1562,12 @@ return false; } +/// SCEV structural equivalence is usually sufficient for testing whether two +/// expressions are equal, however for the purposes of looking for a condition +/// guarding a loop, it can be useful to be a little more general, since a +/// front-end may have replicated the controlling expression. +static bool HasSameValue(const SCEV *A, const SCEV *B); + const SCEV * ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && @@ -1756,6 +1762,22 @@ } } + // zext(A % B) --> zext(A) % zext(B) + { + const SCEV *LHS; + const SCEV *RHS; + if (matchURem(Op, LHS, RHS)) { + return getURemExpr(getZeroExtendExpr(LHS, Ty, Depth + 1), + getZeroExtendExpr(RHS, Ty, Depth + 1)); + } + } + + // zext(A / C) --> zext(A) / zext(C). + if (auto *Div = dyn_cast(Op)) { + return getUDivExpr(getZeroExtendExpr(Div->getLHS(), Ty, Depth + 1), + getZeroExtendExpr(Div->getRHS(), Ty, Depth + 1)); + } + if (auto *SA = dyn_cast(Op)) { // zext((A + B + ...)) --> (zext(A) + zext(B) + ...) if (SA->hasNoUnsignedWrap()) { @@ -8488,10 +8510,6 @@ return {nullptr, nullptr}; } -/// SCEV structural equivalence is usually sufficient for testing whether two -/// expressions are equal, however for the purposes of looking for a condition -/// guarding a loop, it can be useful to be a little more general, since a -/// front-end may have replicated the controlling expression. static bool HasSameValue(const SCEV *A, const SCEV *B) { // Quick check to see if they are the same SCEV. if (A == B) return true; @@ -12153,3 +12171,44 @@ OS.indent(Depth + 2) << "--> " << *II->second.second << "\n"; } } + +bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, + const SCEV *&RHS) { + const auto *Add = dyn_cast(Expr); + if (Add == nullptr || Add->getNumOperands() != 2) { + return false; + } + const auto MatchAddends = [&](const SCEV *A, const SCEV *DivOp1) { + const auto *Mul = dyn_cast(DivOp1); + if (Mul == nullptr) { + return false; + } + const auto MatchMulends = [&](const SCEV *B) { + // (SomeExpr + ((SomeExpr / B) * -B)). + const SCEV *NegB = getNegativeSCEV(B, SCEV::FlagNSW); + if (HasSameValue(Expr, getURemExpr(A, NegB))) { + LHS = A; + RHS = NegB; + return true; + } + // (SomeExpr + (-(SomeExpr / B) * B)). + if (HasSameValue(Expr, getURemExpr(A, B))) { + LHS = A; + RHS = B; + return true; + } + return false; + }; + if (Mul->getNumOperands() == 2) { + return MatchMulends(Mul->getOperand(1)) || + MatchMulends(Mul->getOperand(0)); + } + if (Mul->getNumOperands() == 3 && isa(Mul->getOperand(0))) { + return MatchMulends(Mul->getOperand(1)) || + MatchMulends(Mul->getOperand(2)); + } + return false; + }; + return MatchAddends(Add->getOperand(1), Add->getOperand(0)) || + MatchAddends(Add->getOperand(0), Add->getOperand(1)); +}; Index: llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll +++ llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s -; CHECK: Loop %bb: backedge-taken count is ((999 + (-1 * %x)) /u 3) +; CHECK: Loop %bb: backedge-taken count is ((999 + (-1 * %x)) /u 3) ; CHECK: Loop %bb: max backedge-taken count is 334 Index: llvm/test/Analysis/ScalarEvolution/strip-injective-zext.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/strip-injective-zext.ll +++ llvm/test/Analysis/ScalarEvolution/strip-injective-zext.ll @@ -9,7 +9,7 @@ ; Check that the backedge taken count was actually computed: ; CHECK: Determining loop execution counts for: @f0 -; CHECK-NEXT: Loop %b2: backedge-taken count is (-1 + (-1 * (trunc i32 %a1 to i2))) +; CHECK-NEXT: Loop %b2: backedge-taken count is (-1 + (-1 * (trunc i32 %a1 to i2))) target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64" Index: llvm/test/Analysis/ScalarEvolution/trip-count14.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/trip-count14.ll +++ llvm/test/Analysis/ScalarEvolution/trip-count14.ll @@ -14,7 +14,7 @@ br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 1 times ; CHECK-LABEL: Determining loop execution counts for: @s32_max1 -; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((1 + %n) smax %n)) +; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((1 + %n) smax %n)) ; CHECK-NEXT: Loop %do.body: max backedge-taken count is 1, actual taken count either this or zero. do.end: @@ -35,7 +35,7 @@ br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 2 times ; CHECK-LABEL: Determining loop execution counts for: @s32_max2 -; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((2 + %n) smax %n)) +; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((2 + %n) smax %n)) ; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2, actual taken count either this or zero. do.end: @@ -56,7 +56,7 @@ br i1 %cmp, label %do.body, label %do.end ; taken either 0 or x times ; CHECK-LABEL: Determining loop execution counts for: @s32_maxx -; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((%n + %x) smax %n)) +; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((%n + %x) smax %n)) ; CHECK-NEXT: Loop %do.body: max backedge-taken count is -1{{$}} do.end: @@ -81,7 +81,7 @@ br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times ; CHECK-LABEL: Determining loop execution counts for: @s32_max2_unpredictable_exit -; CHECK-NEXT: Loop %do.body: backedge-taken count is (-1 + (-1 * ((-1 + (-1 * ((2 + %n) smax %n)) + %n) umax (-1 + (-1 * %x) + %n)))) +; CHECK-NEXT: Loop %do.body: backedge-taken count is (-1 + (-1 * ((-1 + (-1 * ((2 + %n) smax %n)) + %n) umax (-1 + (-1 * %x) + %n)))) ; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}} do.end: Index: llvm/test/Analysis/ScalarEvolution/zext-divrem.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScalarEvolution/zext-divrem.ll @@ -0,0 +1,42 @@ +; RUN: opt -analyze -scalar-evolution -S < %s | FileCheck %s + +define i64 @test1(i32 %a, i32 %b) { +; CHECK-LABEL: @test1 + %div = udiv i32 %a, %b + %zext = zext i32 %div to i64 +; CHECK: %zext +; CHECK-NEXT: --> ((zext i32 %a to i64) /u (zext i32 %b to i64)) + ret i64 %zext +} + +define i64 @test2(i32 %a, i32 %b) { +; CHECK-LABEL: @test2 + %rem = urem i32 %a, %b + %zext = zext i32 %rem to i64 +; CHECK: %zext +; CHECK-NEXT: --> ((zext i32 %a to i64) + (-1 * (zext i32 %b to i64) * ((zext i32 %a to i64) /u (zext i32 %b to i64)))) + ret i64 %zext +} + +define i64 @test3(i32 %a, i32 %b) { +; CHECK-LABEL: @test3 + %div = udiv i32 %a, %b + %mul = mul i32 %div, %b + %sub = sub i32 %a, %mul + %zext = zext i32 %sub to i64 +; CHECK: %zext +; CHECK-NEXT: --> ((zext i32 %a to i64) + (-1 * (zext i32 %b to i64) * ((zext i32 %a to i64) /u (zext i32 %b to i64)))) + ret i64 %zext +} + +define i64 @test4(i32 %t) { +; CHECK-LABEL: @test4 + %a = udiv i32 %t, 2 + %div = udiv i32 %t, 112 + %mul = mul i32 %div, 56 + %sub = sub i32 %a, %mul + %zext = zext i32 %sub to i64 +; CHECK: %zext +; CHECK-NEXT: --> ((-56 * ((zext i32 %t to i64) /u 112)) + ((zext i32 %t to i64) /u 2)) + ret i64 %zext +}