Index: llvm/trunk/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp @@ -6306,8 +6306,48 @@ // also returns true if StepV is maximally negative (eg, INT_MIN), but that // case is not handled as this code is guarded by !CountDown. if (StepV.isPowerOf2() && - GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) - return getUDivExactExpr(Distance, Step); + GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) { + // Here we've constrained the equation to be of the form + // + // 2^(N + k) * Distance' = (StepV == 2^N) * X (mod 2^W) ... (0) + // + // where we're operating on a W bit wide integer domain and k is + // non-negative. The smallest unsigned solution for X is the trip count. + // + // (0) is equivalent to: + // + // 2^(N + k) * Distance' - 2^N * X = L * 2^W + // <=> 2^N(2^k * Distance' - X) = L * 2^(W - N) * 2^N + // <=> 2^k * Distance' - X = L * 2^(W - N) + // <=> 2^k * Distance' = L * 2^(W - N) + X ... (1) + // + // The smallest X satisfying (1) is unsigned remainder of dividing the LHS + // by 2^(W - N). + // + // <=> X = 2^k * Distance' URem 2^(W - N) ... (2) + // + // E.g. say we're solving + // + // 2 * Val = 2 * X (in i8) ... (3) + // + // then from (2), we get X = Val URem i8 128 (k = 0 in this case). + // + // Note: It is tempting to solve (3) by setting X = Val, but Val is not + // necessarily the smallest unsigned value of X that satisfies (3). + // E.g. if Val is i8 -127 then the smallest value of X that satisfies (3) + // is i8 1, not i8 -127 + + const auto *ModuloResult = getUDivExactExpr(Distance, Step); + + // Since SCEV does not have a URem node, we construct one using a truncate + // and a zero extend. + + unsigned NarrowWidth = StepV.getBitWidth() - StepV.countTrailingZeros(); + auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth); + auto *WideTy = Distance->getType(); + + return getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy); + } } // If the condition controls loop exit (the loop exits only if the expression Index: llvm/trunk/test/Analysis/ScalarEvolution/pr24757.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/pr24757.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/pr24757.ll @@ -0,0 +1,35 @@ +; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s + +; CHECK: Loop %bb1: backedge-taken count is (zext i7 (trunc i8 %a.promoted to i7) to i8) + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +@a = global i8 -127, align 1 +@b = common global i32 0, align 4 + +declare void @use(i32) + +define i32 @main() { +bb: + %a.promoted = load i8, i8* @a + br label %bb1 + +bb1: ; preds = %bb1, %bb + %tmp = phi i8 [ %tmp2, %bb1 ], [ %a.promoted, %bb ] + %tmp2 = add i8 %tmp, -1 + %tmp3 = sext i8 %tmp to i32 + %tmp4 = xor i32 %tmp3, -1 + %tmp5 = sext i8 %tmp2 to i32 + %tmpf = sub nsw i32 %tmp4, %tmp5 + %tmp6 = trunc i32 %tmpf to i8 + %tmp7 = icmp eq i8 %tmp6, 0 + br i1 %tmp7, label %bb8, label %bb1 + +bb8: ; preds = %bb1 + store i8 %tmp2, i8* @a + store i32 %tmp4, i32* @b + %tmp9 = sext i8 %tmp2 to i32 + call void @use(i32 %tmp9) + ret i32 0 +}