Index: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h +++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h @@ -1845,6 +1845,10 @@ /// accordingly. void addToLoopUseLists(const SCEV *S); + /// Try to match the pattern generated by getURemExpr(A, B). If successful, + /// Assign A and B to LHS and RHS, respectively. + bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS); + FoldingSet UniqueSCEVs; FoldingSet UniquePreds; BumpPtrAllocator SCEVAllocator; Index: llvm/trunk/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp @@ -1756,6 +1756,20 @@ } } + // zext(A % B) --> zext(A) % zext(B) + { + const SCEV *LHS; + const SCEV *RHS; + if (matchURem(Op, LHS, RHS)) + return getURemExpr(getZeroExtendExpr(LHS, Ty, Depth + 1), + getZeroExtendExpr(RHS, Ty, Depth + 1)); + } + + // zext(A / B) --> zext(A) / zext(B). + if (auto *Div = dyn_cast(Op)) + return getUDivExpr(getZeroExtendExpr(Div->getLHS(), Ty, Depth + 1), + getZeroExtendExpr(Div->getRHS(), Ty, Depth + 1)); + if (auto *SA = dyn_cast(Op)) { // zext((A + B + ...)) --> (zext(A) + zext(B) + ...) if (SA->hasNoUnsignedWrap()) { @@ -12153,3 +12167,43 @@ OS.indent(Depth + 2) << "--> " << *II->second.second << "\n"; } } + +// Match the mathematical pattern A - (A / B) * B, where A and B can be +// arbitrary expressions. +// It's not always easy, as A and B can be folded (imagine A is X / 2, and B is +// 4, A / B becomes X / 8). +bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, + const SCEV *&RHS) { + const auto *Add = dyn_cast(Expr); + if (Add == nullptr || Add->getNumOperands() != 2) + return false; + + const SCEV *A = Add->getOperand(1); + const auto *Mul = dyn_cast(Add->getOperand(0)); + + if (Mul == nullptr) + return false; + + const auto MatchURemWithDivisor = [&](const SCEV *B) { + // (SomeExpr + (-(SomeExpr / B) * B)). + if (Expr == getURemExpr(A, B)) { + LHS = A; + RHS = B; + return true; + } + return false; + }; + + // (SomeExpr + (-1 * (SomeExpr / B) * B)). + if (Mul->getNumOperands() == 3 && isa(Mul->getOperand(0))) + return MatchURemWithDivisor(Mul->getOperand(1)) || + MatchURemWithDivisor(Mul->getOperand(2)); + + // (SomeExpr + ((-SomeExpr / B) * B)) or (SomeExpr + ((SomeExpr / B) * -B)). + if (Mul->getNumOperands() == 2) + return MatchURemWithDivisor(Mul->getOperand(1)) || + MatchURemWithDivisor(Mul->getOperand(0)) || + MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(1))) || + MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0))); + return false; +} Index: llvm/trunk/test/Analysis/ScalarEvolution/zext-divrem.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/zext-divrem.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/zext-divrem.ll @@ -0,0 +1,42 @@ +; RUN: opt -analyze -scalar-evolution -S < %s | FileCheck %s + +define i64 @test1(i32 %a, i32 %b) { +; CHECK-LABEL: @test1 + %div = udiv i32 %a, %b + %zext = zext i32 %div to i64 +; CHECK: %zext +; CHECK-NEXT: --> ((zext i32 %a to i64) /u (zext i32 %b to i64)) + ret i64 %zext +} + +define i64 @test2(i32 %a, i32 %b) { +; CHECK-LABEL: @test2 + %rem = urem i32 %a, %b + %zext = zext i32 %rem to i64 +; CHECK: %zext +; CHECK-NEXT: --> ((zext i32 %a to i64) + (-1 * (zext i32 %b to i64) * ((zext i32 %a to i64) /u (zext i32 %b to i64)))) + ret i64 %zext +} + +define i64 @test3(i32 %a, i32 %b) { +; CHECK-LABEL: @test3 + %div = udiv i32 %a, %b + %mul = mul i32 %div, %b + %sub = sub i32 %a, %mul + %zext = zext i32 %sub to i64 +; CHECK: %zext +; CHECK-NEXT: --> ((zext i32 %a to i64) + (-1 * (zext i32 %b to i64) * ((zext i32 %a to i64) /u (zext i32 %b to i64)))) + ret i64 %zext +} + +define i64 @test4(i32 %t) { +; CHECK-LABEL: @test4 + %a = udiv i32 %t, 2 + %div = udiv i32 %t, 112 + %mul = mul i32 %div, 56 + %sub = sub i32 %a, %mul + %zext = zext i32 %sub to i64 +; CHECK: %zext +; CHECK-NEXT: --> ((-56 * ((zext i32 %t to i64) /u 112)) + ((zext i32 %t to i64) /u 2)) + ret i64 %zext +} Index: polly/trunk/test/Isl/CodeGen/param_div_div_div_2.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/param_div_div_div_2.ll +++ polly/trunk/test/Isl/CodeGen/param_div_div_div_2.ll @@ -5,23 +5,27 @@ ; their domain. ; ; CHECK: Invalid Context: -; CHECK-NEXT: [p_0] -> { : p_0 < 0 } -; CHECK-NEXT: p0: ((%a /u %b) /u (%c /u %d)) +; CHECK-NEXT: [p_0] -> { : false } +; CHECK-NEXT: p0: (((zext i32 %a to i64) /u (zext i32 %b to i64)) /u ((zext i32 %c to i64) /u (zext i32 %d to i64))) ; ; void f(unsigned *A, unsigned a, unsigned b, unsigned c, unsigned d) { ; for (unsigned i; i < 100; i++) ; A[i] += A[(a / b) / (c / d)]; ; } ; -; IR: %[[R0:[.a-zA-Z0-9]*]] = icmp ugt i32 %b, 1 -; IR-NEXT: %[[R1:[.a-zA-Z0-9]*]] = select i1 %[[R0]], i32 %b, i32 1 -; IR-NEXT: %[[R2:[.a-zA-Z0-9]*]] = udiv i32 %a, %[[R1]] -; IR-NEXT: %[[R5:[.a-zA-Z0-9]*]] = icmp ugt i32 %d, 1 -; IR-NEXT: %[[R6:[.a-zA-Z0-9]*]] = select i1 %[[R5]], i32 %d, i32 1 -; IR-NEXT: %[[R7:[.a-zA-Z0-9]*]] = udiv i32 %c, %[[R6]] -; IR-NEXT: %[[R3:[.a-zA-Z0-9]*]] = icmp ugt i32 %[[R7]], 1 -; IR-NEXT: %[[R4:[.a-zA-Z0-9]*]] = select i1 %[[R3]], i32 %[[R7]], i32 1 -; IR-NEXT: %[[R8:[.a-zA-Z0-9]*]] = udiv i32 %[[R2]], %[[R4]] +; IR: %[[A:[.a-zA-Z0-9]*]] = zext i32 %a to i64 +; IR-NEXT: %[[B:[.a-zA-Z0-9]*]] = zext i32 %b to i64 +; IR-NEXT: %[[R0:[.a-zA-Z0-9]*]] = icmp ugt i64 %[[B]], 1 +; IR-NEXT: %[[R1:[.a-zA-Z0-9]*]] = select i1 %[[R0]], i64 %[[B]], i64 1 +; IR-NEXT: %[[R2:[.a-zA-Z0-9]*]] = udiv i64 %[[A]], %[[R1]] +; IR-NEXT: %[[C:[.a-zA-Z0-9]*]] = zext i32 %c to i64 +; IR-NEXT: %[[D:[.a-zA-Z0-9]*]] = zext i32 %d to i64 +; IR-NEXT: %[[R5:[.a-zA-Z0-9]*]] = icmp ugt i64 %[[D]], 1 +; IR-NEXT: %[[R6:[.a-zA-Z0-9]*]] = select i1 %[[R5]], i64 %[[D]], i64 1 +; IR-NEXT: %[[R7:[.a-zA-Z0-9]*]] = udiv i64 %[[C]], %[[R6]] +; IR-NEXT: %[[R3:[.a-zA-Z0-9]*]] = icmp ugt i64 %[[R7]], 1 +; IR-NEXT: %[[R4:[.a-zA-Z0-9]*]] = select i1 %[[R3]], i64 %[[R7]], i64 1 +; IR-NEXT: %[[R8:[.a-zA-Z0-9]*]] = udiv i64 %[[R2]], %[[R4]] ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"