Index: lib/Transforms/InstCombine/InstCombineShifts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineShifts.cpp +++ lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -622,11 +622,8 @@ return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask)); } - // Be careful about hiding shl instructions behind bit masks. They are used - // to represent multiplies by a constant, and it is important that simple - // arithmetic expressions are still recognizable by scalar evolution. - // The inexact versions are deferred to DAGCombine, so we don't hide shl - // behind a bit mask. + // FIXME: we do not yet transform non-exact lshr's. The backend (DAGCombine) + // needs a few fixes for the rotate pattern recognition first. const APInt *ShOp1; if (match(Op0, m_Exact(m_Shr(m_Value(X), m_APInt(ShOp1))))) { unsigned ShrAmt = ShOp1->getZExtValue(); Index: test/Analysis/ScalarEvolution/extract-highbits-sameconstmask.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/extract-highbits-sameconstmask.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s + +; The obvious case. +define i32 @div(i32 %val) nounwind { +; CHECK-LABEL: 'div' +; CHECK-NEXT: Classifying expressions for: @div +; CHECK-NEXT: %tmp1 = udiv i32 %val, 16 +; CHECK-NEXT: --> (%val /u 16) U: [0,268435456) S: [0,268435456) +; CHECK-NEXT: %tmp2 = mul i32 %tmp1, 16 +; CHECK-NEXT: --> (16 * (%val /u 16)) U: [0,-15) S: [0,-15) +; CHECK-NEXT: Determining loop execution counts for: @div +; + %tmp1 = udiv i32 %val, 16 + %tmp2 = mul i32 %tmp1, 16 + ret i32 %tmp2 +} + +define i32 @sdiv(i32 %val) nounwind { +; CHECK-LABEL: 'sdiv' +; CHECK-NEXT: Classifying expressions for: @sdiv +; CHECK-NEXT: %tmp1 = sdiv i32 %val, 16 +; CHECK-NEXT: --> %tmp1 U: full-set S: [-134217728,134217728) +; CHECK-NEXT: %tmp2 = mul i32 %tmp1, 16 +; CHECK-NEXT: --> (16 * %tmp1) U: [0,-15) S: [-2147483648,2147483633) +; CHECK-NEXT: Determining loop execution counts for: @sdiv +; + %tmp1 = sdiv i32 %val, 16 + %tmp2 = mul i32 %tmp1, 16 + ret i32 %tmp2 +} + +; Or, it could be a number of equivalent patterns with mask: +; b) x & (-1 << nbits) +; d) x >> nbits << nbits + +define i32 @mask_b(i32 %val) nounwind { +; CHECK-LABEL: 'mask_b' +; CHECK-NEXT: Classifying expressions for: @mask_b +; CHECK-NEXT: %masked = and i32 %val, -16 +; CHECK-NEXT: --> (16 * (%val /u 16)) U: [0,-15) S: [0,-15) +; CHECK-NEXT: Determining loop execution counts for: @mask_b +; + %masked = and i32 %val, -16 + ret i32 %masked +} + +define i32 @mask_d(i32 %val) nounwind { +; CHECK-LABEL: 'mask_d' +; CHECK-NEXT: Classifying expressions for: @mask_d +; CHECK-NEXT: %lowbitscleared = lshr i32 %val, 4 +; CHECK-NEXT: --> (%val /u 16) U: [0,268435456) S: [0,268435456) +; CHECK-NEXT: %masked = shl i32 %lowbitscleared, 4 +; CHECK-NEXT: --> (16 * (%val /u 16)) U: [0,-15) S: [0,-15) +; CHECK-NEXT: Determining loop execution counts for: @mask_d +; + %lowbitscleared = lshr i32 %val, 4 + %masked = shl i32 %lowbitscleared, 4 + ret i32 %masked +} Index: test/Analysis/ScalarEvolution/extract-highbits-variablemask.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/extract-highbits-variablemask.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s + +; These testcases aren't *identical* but they have the same/similar meaning. + +; The obvious case. +define i32 @div(i32 %val, i32 %num) nounwind { +; CHECK-LABEL: 'div' +; CHECK-NEXT: Classifying expressions for: @div +; CHECK-NEXT: %tmp1 = udiv i32 %val, %num +; CHECK-NEXT: --> (%val /u %num) U: full-set S: full-set +; CHECK-NEXT: %tmp2 = mul i32 %tmp1, %num +; CHECK-NEXT: --> ((%val /u %num) * %num) U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @div +; + %tmp1 = udiv i32 %val, %num + %tmp2 = mul i32 %tmp1, %num + ret i32 %tmp2 +} + +define i32 @sdiv(i32 %val, i32 %num) nounwind { +; CHECK-LABEL: 'sdiv' +; CHECK-NEXT: Classifying expressions for: @sdiv +; CHECK-NEXT: %tmp1 = sdiv i32 %val, %num +; CHECK-NEXT: --> %tmp1 U: full-set S: full-set +; CHECK-NEXT: %tmp2 = mul i32 %tmp1, %num +; CHECK-NEXT: --> (%num * %tmp1) U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @sdiv +; + %tmp1 = sdiv i32 %val, %num + %tmp2 = mul i32 %tmp1, %num + ret i32 %tmp2 +} + +; Or, it could be a number of equivalent patterns with mask: +; b) x & (-1 << nbits) +; d) x >> (32 - y) << (32 - y) + +define i32 @mask_b(i32 %val, i32 %numlowbits) nounwind { +; CHECK-LABEL: 'mask_b' +; CHECK-NEXT: Classifying expressions for: @mask_b +; CHECK-NEXT: %mask = shl i32 -1, %numlowbits +; CHECK-NEXT: --> %mask U: full-set S: full-set +; CHECK-NEXT: %masked = and i32 %mask, %val +; CHECK-NEXT: --> %masked U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @mask_b +; + %mask = shl i32 -1, %numlowbits + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @mask_d(i32 %val, i32 %lowbits) nounwind { +; CHECK-LABEL: 'mask_d' +; CHECK-NEXT: Classifying expressions for: @mask_d +; CHECK-NEXT: %numlowbits = sub i32 32, %lowbits +; CHECK-NEXT: --> (32 + (-1 * %lowbits)) U: full-set S: full-set +; CHECK-NEXT: %lowbitscleared = lshr i32 %val, %numlowbits +; CHECK-NEXT: --> %lowbitscleared U: full-set S: full-set +; CHECK-NEXT: %masked = shl i32 %lowbitscleared, %numlowbits +; CHECK-NEXT: --> %masked U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @mask_d +; + %numlowbits = sub i32 32, %lowbits + %lowbitscleared = lshr i32 %val, %numlowbits + %masked = shl i32 %lowbitscleared, %numlowbits + ret i32 %masked +} Index: test/Analysis/ScalarEvolution/extract-lowbits-sameconstmask.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/extract-lowbits-sameconstmask.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s + +; The obvious case. +define i32 @mul(i32 %val) nounwind { +; CHECK-LABEL: 'mul' +; CHECK-NEXT: Classifying expressions for: @mul +; CHECK-NEXT: %tmp1 = mul i32 %val, 16 +; CHECK-NEXT: --> (16 * %val) U: [0,-15) S: [-2147483648,2147483633) +; CHECK-NEXT: %tmp2 = udiv i32 %tmp1, 16 +; CHECK-NEXT: --> ((16 * %val) /u 16) U: [0,268435456) S: [0,268435456) +; CHECK-NEXT: Determining loop execution counts for: @mul +; + %tmp1 = mul i32 %val, 16 + %tmp2 = udiv i32 %tmp1, 16 + ret i32 %tmp2 +} + +; Or, it could be any number of equivalent patterns with mask: +; a) x & (1 << nbits) - 1 +; b) x & ~(-1 << nbits) +; c) x & (-1 >> (32 - y)) +; d) x << (32 - y) >> (32 - y) + +define i32 @mask_abc(i32 %val) nounwind { +; CHECK-LABEL: 'mask_abc' +; CHECK-NEXT: Classifying expressions for: @mask_abc +; CHECK-NEXT: %masked = and i32 %val, 15 +; CHECK-NEXT: --> (zext i4 (trunc i32 %val to i4) to i32) U: [0,16) S: [0,16) +; CHECK-NEXT: Determining loop execution counts for: @mask_abc +; + %masked = and i32 %val, 15 + ret i32 %masked +} + +define i32 @mask_d(i32 %val) nounwind { +; CHECK-LABEL: 'mask_d' +; CHECK-NEXT: Classifying expressions for: @mask_d +; CHECK-NEXT: %highbitscleared = shl i32 %val, 4 +; CHECK-NEXT: --> (16 * %val) U: [0,-15) S: [-2147483648,2147483633) +; CHECK-NEXT: %masked = lshr i32 %highbitscleared, 4 +; CHECK-NEXT: --> ((16 * %val) /u 16) U: [0,268435456) S: [0,268435456) +; CHECK-NEXT: Determining loop execution counts for: @mask_d +; + %highbitscleared = shl i32 %val, 4 + %masked = lshr i32 %highbitscleared, 4 + ret i32 %masked +} Index: test/Analysis/ScalarEvolution/extract-lowbits-variablemask.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/extract-lowbits-variablemask.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s + +; These testcases aren't *identical* but they have the same/similar meaning. + +; The obvious case. +define i32 @mul(i32 %val, i32 %num) nounwind { +; CHECK-LABEL: 'mul' +; CHECK-NEXT: Classifying expressions for: @mul +; CHECK-NEXT: %tmp1 = mul i32 %val, %num +; CHECK-NEXT: --> (%val * %num) U: full-set S: full-set +; CHECK-NEXT: %tmp2 = udiv i32 %tmp1, %num +; CHECK-NEXT: --> ((%val * %num) /u %num) U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @mul +; + %tmp1 = mul i32 %val, %num + %tmp2 = udiv i32 %tmp1, %num + ret i32 %tmp2 +} + +; Or, it could be any number of equivalent patterns with mask: +; a) x & (1 << nbits) - 1 +; b) x & ~(-1 << nbits) +; c) x & (-1 >> (32 - y)) +; d) x << (32 - y) >> (32 - y) + +define i32 @mask_a(i32 %val, i32 %numlowbits) nounwind { +; CHECK-LABEL: 'mask_a' +; CHECK-NEXT: Classifying expressions for: @mask_a +; CHECK-NEXT: %onebit = shl i32 1, %numlowbits +; CHECK-NEXT: --> %onebit U: full-set S: full-set +; CHECK-NEXT: %mask = add nsw i32 %onebit, -1 +; CHECK-NEXT: --> (-1 + %onebit) U: full-set S: full-set +; CHECK-NEXT: %masked = and i32 %mask, %val +; CHECK-NEXT: --> %masked U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @mask_a +; + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @mask_b(i32 %val, i32 %numlowbits) nounwind { +; CHECK-LABEL: 'mask_b' +; CHECK-NEXT: Classifying expressions for: @mask_b +; CHECK-NEXT: %notmask = shl i32 -1, %numlowbits +; CHECK-NEXT: --> %notmask U: full-set S: full-set +; CHECK-NEXT: %mask = xor i32 %notmask, -1 +; CHECK-NEXT: --> (-1 + (-1 * %notmask)) U: full-set S: full-set +; CHECK-NEXT: %masked = and i32 %mask, %val +; CHECK-NEXT: --> %masked U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @mask_b +; + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @mask_c(i32 %val, i32 %numlowbits) nounwind { +; CHECK-LABEL: 'mask_c' +; CHECK-NEXT: Classifying expressions for: @mask_c +; CHECK-NEXT: %numhighbits = sub i32 32, %numlowbits +; CHECK-NEXT: --> (32 + (-1 * %numlowbits)) U: full-set S: full-set +; CHECK-NEXT: %mask = lshr i32 -1, %numhighbits +; CHECK-NEXT: --> %mask U: full-set S: full-set +; CHECK-NEXT: %masked = and i32 %mask, %val +; CHECK-NEXT: --> %masked U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @mask_c +; + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @mask_d(i32 %val, i32 %numlowbits) nounwind { +; CHECK-LABEL: 'mask_d' +; CHECK-NEXT: Classifying expressions for: @mask_d +; CHECK-NEXT: %numhighbits = sub i32 32, %numlowbits +; CHECK-NEXT: --> (32 + (-1 * %numlowbits)) U: full-set S: full-set +; CHECK-NEXT: %highbitscleared = shl i32 %val, %numhighbits +; CHECK-NEXT: --> %highbitscleared U: full-set S: full-set +; CHECK-NEXT: %masked = lshr i32 %highbitscleared, %numhighbits +; CHECK-NEXT: --> %masked U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @mask_d +; + %numhighbits = sub i32 32, %numlowbits + %highbitscleared = shl i32 %val, %numhighbits + %masked = lshr i32 %highbitscleared, %numhighbits + ret i32 %masked +} Index: test/Analysis/ScalarEvolution/lshr-shl-differentconstmask.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/lshr-shl-differentconstmask.ll @@ -0,0 +1,141 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s + +; The obvious case. +define i32 @udiv_biggerLshr(i32 %val) nounwind { +; CHECK-LABEL: 'udiv_biggerLshr' +; CHECK-NEXT: Classifying expressions for: @udiv_biggerLshr +; CHECK-NEXT: %tmp1 = udiv i32 %val, 64 +; CHECK-NEXT: --> (%val /u 64) U: [0,67108864) S: [0,67108864) +; CHECK-NEXT: %tmp2 = mul i32 %tmp1, 16 +; CHECK-NEXT: --> (16 * (%val /u 64)) U: [0,1073741809) S: [0,1073741809) +; CHECK-NEXT: Determining loop execution counts for: @udiv_biggerLshr +; + %tmp1 = udiv i32 %val, 64 + %tmp2 = mul i32 %tmp1, 16 + ret i32 %tmp2 +} + +define i32 @udiv_biggerShl(i32 %val) nounwind { +; CHECK-LABEL: 'udiv_biggerShl' +; CHECK-NEXT: Classifying expressions for: @udiv_biggerShl +; CHECK-NEXT: %tmp1 = udiv i32 %val, 16 +; CHECK-NEXT: --> (%val /u 16) U: [0,268435456) S: [0,268435456) +; CHECK-NEXT: %tmp2 = mul i32 %tmp1, 64 +; CHECK-NEXT: --> (64 * (%val /u 16)) U: [0,-63) S: [-2147483648,2147483585) +; CHECK-NEXT: Determining loop execution counts for: @udiv_biggerShl +; + %tmp1 = udiv i32 %val, 16 + %tmp2 = mul i32 %tmp1, 64 + ret i32 %tmp2 +} + +; Or, it could have been transformed to shifts + +define i32 @shifty_biggerLshr(i32 %val) { +; CHECK-LABEL: 'shifty_biggerLshr' +; CHECK-NEXT: Classifying expressions for: @shifty_biggerLshr +; CHECK-NEXT: %tmp1 = lshr i32 %val, 6 +; CHECK-NEXT: --> (%val /u 64) U: [0,67108864) S: [0,67108864) +; CHECK-NEXT: %tmp2 = shl i32 %tmp1, 4 +; CHECK-NEXT: --> (16 * (%val /u 64)) U: [0,1073741809) S: [0,1073741809) +; CHECK-NEXT: Determining loop execution counts for: @shifty_biggerLshr +; + %tmp1 = lshr i32 %val, 6 + %tmp2 = shl i32 %tmp1, 4 + ret i32 %tmp2 +} + +define i32 @shifty_biggerLshr_lshrexact(i32 %val) { +; CHECK-LABEL: 'shifty_biggerLshr_lshrexact' +; CHECK-NEXT: Classifying expressions for: @shifty_biggerLshr_lshrexact +; CHECK-NEXT: %tmp1 = lshr exact i32 %val, 6 +; CHECK-NEXT: --> (%val /u 64) U: [0,67108864) S: [0,67108864) +; CHECK-NEXT: %tmp2 = shl i32 %tmp1, 4 +; CHECK-NEXT: --> (16 * (%val /u 64)) U: [0,1073741809) S: [0,1073741809) +; CHECK-NEXT: Determining loop execution counts for: @shifty_biggerLshr_lshrexact +; + %tmp1 = lshr exact i32 %val, 6 + %tmp2 = shl i32 %tmp1, 4 + ret i32 %tmp2 +} + +define i32 @shifty_biggerShr(i32 %val) { +; CHECK-LABEL: 'shifty_biggerShr' +; CHECK-NEXT: Classifying expressions for: @shifty_biggerShr +; CHECK-NEXT: %tmp1 = lshr i32 %val, 4 +; CHECK-NEXT: --> (%val /u 16) U: [0,268435456) S: [0,268435456) +; CHECK-NEXT: %tmp2 = shl i32 %tmp1, 6 +; CHECK-NEXT: --> (64 * (%val /u 16)) U: [0,-63) S: [-2147483648,2147483585) +; CHECK-NEXT: Determining loop execution counts for: @shifty_biggerShr +; + %tmp1 = lshr i32 %val, 4 + %tmp2 = shl i32 %tmp1, 6 + ret i32 %tmp2 +} + +define i32 @shifty_biggerShr_lshrexact(i32 %val) { +; CHECK-LABEL: 'shifty_biggerShr_lshrexact' +; CHECK-NEXT: Classifying expressions for: @shifty_biggerShr_lshrexact +; CHECK-NEXT: %tmp1 = lshr exact i32 %val, 4 +; CHECK-NEXT: --> (%val /u 16) U: [0,268435456) S: [0,268435456) +; CHECK-NEXT: %tmp2 = shl i32 %tmp1, 6 +; CHECK-NEXT: --> (64 * (%val /u 16)) U: [0,-63) S: [-2147483648,2147483585) +; CHECK-NEXT: Determining loop execution counts for: @shifty_biggerShr_lshrexact +; + %tmp1 = lshr exact i32 %val, 4 + %tmp2 = shl i32 %tmp1, 6 + ret i32 %tmp2 +} + +; Or, further folded into mask variant. + +define i32 @masky_biggerLshr(i32 %val) { +; CHECK-LABEL: 'masky_biggerLshr' +; CHECK-NEXT: Classifying expressions for: @masky_biggerLshr +; CHECK-NEXT: %tmp1 = lshr i32 %val, 2 +; CHECK-NEXT: --> (%val /u 4) U: [0,1073741824) S: [0,1073741824) +; CHECK-NEXT: %tmp2 = and i32 %tmp1, -16 +; CHECK-NEXT: --> (16 * (%val /u 64)) U: [0,1073741809) S: [0,1073741809) +; CHECK-NEXT: Determining loop execution counts for: @masky_biggerLshr +; + %tmp1 = lshr i32 %val, 2 + %tmp2 = and i32 %tmp1, -16 + ret i32 %tmp2 +} + +define i32 @masky_biggerLshr_lshrexact(i32 %val) { +; CHECK-LABEL: 'masky_biggerLshr_lshrexact' +; CHECK-NEXT: Classifying expressions for: @masky_biggerLshr_lshrexact +; CHECK-NEXT: %tmp1 = lshr exact i32 %val, 2 +; CHECK-NEXT: --> (%val /u 4) U: [0,1073741824) S: [0,1073741824) +; CHECK-NEXT: Determining loop execution counts for: @masky_biggerLshr_lshrexact +; + %tmp1 = lshr exact i32 %val, 2 + ret i32 %tmp1 +} + +define i32 @masky_biggerShr(i32 %val) { +; CHECK-LABEL: 'masky_biggerShr' +; CHECK-NEXT: Classifying expressions for: @masky_biggerShr +; CHECK-NEXT: %tmp1 = shl i32 %val, 2 +; CHECK-NEXT: --> (4 * %val) U: [0,-3) S: [-2147483648,2147483645) +; CHECK-NEXT: %tmp2 = and i32 %tmp1, -64 +; CHECK-NEXT: --> (64 * (zext i26 (trunc i32 (%val /u 16) to i26) to i32)) U: [0,-63) S: [0,-63) +; CHECK-NEXT: Determining loop execution counts for: @masky_biggerShr +; + %tmp1 = shl i32 %val, 2 + %tmp2 = and i32 %tmp1, -64 + ret i32 %tmp2 +} + +define i32 @masky_biggerShr_lshrexact(i32 %val) { +; CHECK-LABEL: 'masky_biggerShr_lshrexact' +; CHECK-NEXT: Classifying expressions for: @masky_biggerShr_lshrexact +; CHECK-NEXT: %tmp1 = shl i32 %val, 2 +; CHECK-NEXT: --> (4 * %val) U: [0,-3) S: [-2147483648,2147483645) +; CHECK-NEXT: Determining loop execution counts for: @masky_biggerShr_lshrexact +; + %tmp1 = shl i32 %val, 2 + ret i32 %tmp1 +} Index: test/Analysis/ScalarEvolution/shl-lshr-differentconstmask.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/shl-lshr-differentconstmask.ll @@ -0,0 +1,141 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s + +; The obvious case. +define i32 @mul_biggerShl(i32 %val) nounwind { +; CHECK-LABEL: 'mul_biggerShl' +; CHECK-NEXT: Classifying expressions for: @mul_biggerShl +; CHECK-NEXT: %tmp1 = mul i32 %val, 64 +; CHECK-NEXT: --> (64 * %val) U: [0,-63) S: [-2147483648,2147483585) +; CHECK-NEXT: %tmp2 = udiv i32 %tmp1, 16 +; CHECK-NEXT: --> ((64 * %val) /u 16) U: [0,268435453) S: [0,268435456) +; CHECK-NEXT: Determining loop execution counts for: @mul_biggerShl +; + %tmp1 = mul i32 %val, 64 + %tmp2 = udiv i32 %tmp1, 16 + ret i32 %tmp2 +} + +define i32 @mul_biggerLshl(i32 %val) nounwind { +; CHECK-LABEL: 'mul_biggerLshl' +; CHECK-NEXT: Classifying expressions for: @mul_biggerLshl +; CHECK-NEXT: %tmp1 = mul i32 %val, 16 +; CHECK-NEXT: --> (16 * %val) U: [0,-15) S: [-2147483648,2147483633) +; CHECK-NEXT: %tmp2 = udiv i32 %tmp1, 64 +; CHECK-NEXT: --> ((16 * %val) /u 64) U: [0,67108864) S: [0,67108864) +; CHECK-NEXT: Determining loop execution counts for: @mul_biggerLshl +; + %tmp1 = mul i32 %val, 16 + %tmp2 = udiv i32 %tmp1, 64 + ret i32 %tmp2 +} + +; Or, it could have been transformed to shifts + +define i32 @shifty_biggerShl(i32 %val) { +; CHECK-LABEL: 'shifty_biggerShl' +; CHECK-NEXT: Classifying expressions for: @shifty_biggerShl +; CHECK-NEXT: %tmp1 = shl i32 %val, 6 +; CHECK-NEXT: --> (64 * %val) U: [0,-63) S: [-2147483648,2147483585) +; CHECK-NEXT: %tmp2 = lshr i32 %tmp1, 4 +; CHECK-NEXT: --> ((64 * %val) /u 16) U: [0,268435453) S: [0,268435456) +; CHECK-NEXT: Determining loop execution counts for: @shifty_biggerShl +; + %tmp1 = shl i32 %val, 6 + %tmp2 = lshr i32 %tmp1, 4 + ret i32 %tmp2 +} + +define i32 @shifty_biggerShl_shlnuw(i32 %val) { +; CHECK-LABEL: 'shifty_biggerShl_shlnuw' +; CHECK-NEXT: Classifying expressions for: @shifty_biggerShl_shlnuw +; CHECK-NEXT: %tmp1 = shl nuw i32 %val, 6 +; CHECK-NEXT: --> (64 * %val) U: [0,-63) S: [-2147483648,2147483585) +; CHECK-NEXT: %tmp2 = lshr i32 %tmp1, 4 +; CHECK-NEXT: --> ((64 * %val) /u 16) U: [0,268435453) S: [0,268435456) +; CHECK-NEXT: Determining loop execution counts for: @shifty_biggerShl_shlnuw +; + %tmp1 = shl nuw i32 %val, 6 + %tmp2 = lshr i32 %tmp1, 4 + ret i32 %tmp2 +} + +define i32 @shifty_biggerLshr(i32 %val) { +; CHECK-LABEL: 'shifty_biggerLshr' +; CHECK-NEXT: Classifying expressions for: @shifty_biggerLshr +; CHECK-NEXT: %tmp1 = shl i32 %val, 4 +; CHECK-NEXT: --> (16 * %val) U: [0,-15) S: [-2147483648,2147483633) +; CHECK-NEXT: %tmp2 = lshr i32 %tmp1, 6 +; CHECK-NEXT: --> ((16 * %val) /u 64) U: [0,67108864) S: [0,67108864) +; CHECK-NEXT: Determining loop execution counts for: @shifty_biggerLshr +; + %tmp1 = shl i32 %val, 4 + %tmp2 = lshr i32 %tmp1, 6 + ret i32 %tmp2 +} + +define i32 @shifty_biggerLshr_shlnuw(i32 %val) { +; CHECK-LABEL: 'shifty_biggerLshr_shlnuw' +; CHECK-NEXT: Classifying expressions for: @shifty_biggerLshr_shlnuw +; CHECK-NEXT: %tmp1 = shl nuw i32 %val, 4 +; CHECK-NEXT: --> (16 * %val) U: [0,-15) S: [-2147483648,2147483633) +; CHECK-NEXT: %tmp2 = lshr i32 %tmp1, 6 +; CHECK-NEXT: --> ((16 * %val) /u 64) U: [0,67108864) S: [0,67108864) +; CHECK-NEXT: Determining loop execution counts for: @shifty_biggerLshr_shlnuw +; + %tmp1 = shl nuw i32 %val, 4 + %tmp2 = lshr i32 %tmp1, 6 + ret i32 %tmp2 +} + +; Or, further folded into mask variant. + +define i32 @masky_biggerShl(i32 %val) { +; CHECK-LABEL: 'masky_biggerShl' +; CHECK-NEXT: Classifying expressions for: @masky_biggerShl +; CHECK-NEXT: %tmp1 = shl i32 %val, 2 +; CHECK-NEXT: --> (4 * %val) U: [0,-3) S: [-2147483648,2147483645) +; CHECK-NEXT: %tmp2 = and i32 %tmp1, 268435452 +; CHECK-NEXT: --> (4 * (zext i26 (trunc i32 %val to i26) to i32)) U: [0,268435453) S: [0,268435453) +; CHECK-NEXT: Determining loop execution counts for: @masky_biggerShl +; + %tmp1 = shl i32 %val, 2 + %tmp2 = and i32 %tmp1, 268435452 + ret i32 %tmp2 +} + +define i32 @masky_biggerShl_shlnuw(i32 %val) { +; CHECK-LABEL: 'masky_biggerShl_shlnuw' +; CHECK-NEXT: Classifying expressions for: @masky_biggerShl_shlnuw +; CHECK-NEXT: %tmp1 = shl nuw i32 %val, 2 +; CHECK-NEXT: --> (4 * %val) U: [0,-3) S: [-2147483648,2147483645) +; CHECK-NEXT: Determining loop execution counts for: @masky_biggerShl_shlnuw +; + %tmp1 = shl nuw i32 %val, 2 + ret i32 %tmp1 +} + +define i32 @masky_biggerLshr(i32 %val) { +; CHECK-LABEL: 'masky_biggerLshr' +; CHECK-NEXT: Classifying expressions for: @masky_biggerLshr +; CHECK-NEXT: %tmp1 = lshr i32 %val, 2 +; CHECK-NEXT: --> (%val /u 4) U: [0,1073741824) S: [0,1073741824) +; CHECK-NEXT: %tmp2 = and i32 %tmp1, 67108863 +; CHECK-NEXT: --> (zext i26 (trunc i32 (%val /u 4) to i26) to i32) U: [0,67108864) S: [0,67108864) +; CHECK-NEXT: Determining loop execution counts for: @masky_biggerLshr +; + %tmp1 = lshr i32 %val, 2 + %tmp2 = and i32 %tmp1, 67108863 + ret i32 %tmp2 +} + +define i32 @masky_biggerLshr_shlnuw(i32 %val) { +; CHECK-LABEL: 'masky_biggerLshr_shlnuw' +; CHECK-NEXT: Classifying expressions for: @masky_biggerLshr_shlnuw +; CHECK-NEXT: %tmp1 = lshr i32 %val, 2 +; CHECK-NEXT: --> (%val /u 4) U: [0,1073741824) S: [0,1073741824) +; CHECK-NEXT: Determining loop execution counts for: @masky_biggerLshr_shlnuw +; + %tmp1 = lshr i32 %val, 2 + ret i32 %tmp1 +}