diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1904,6 +1904,30 @@ return new ZExtInst(NewBO, Ty); } } + + Constant *C1, *C2; + const APInt *C3 = C; + Value *X; + if (C3->isPowerOf2() && + match(Op0, m_OneUse(m_LShr(m_Shl(m_ImmConstant(C1), m_Value(X)), + m_ImmConstant(C2)))) && + match(C1, m_Power2())) { + Constant *Log2C1 = ConstantExpr::getExactLogBase2(C1); + Constant *Log2C3 = ConstantInt::get(Ty, C3->countTrailingZeros()); + Constant *LshrC = ConstantExpr::getAdd(C2, Log2C3); + KnownBits KnownLShrc = computeKnownBits(LshrC, 0, nullptr); + if (KnownLShrc.getMaxValue().ult(Width)) { + // iff C1,C3 is pow2 and C2 + cttz(C3) < BitWidth: + // ((C1 << X) >> C2) & C3 -> X == (cttz(C3)+C2-cttz(C1)) ? C3 : 0 + Constant *CmpC = ConstantExpr::getSub(LshrC, Log2C1); + Value *Cmp = Builder.CreateICmpEQ(X, CmpC); + return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C3), + ConstantInt::getNullValue(Ty)); + } + // TODO: Symmetrical case + // iff C1,C3 is pow2 and Log2(C3) >= C2: + // ((C1 >> X) << C2) & C3 -> X == (cttz(C1)+C2-cttz(C3)) ? C3 : 0 + } } if (match(&I, m_And(m_OneUse(m_Shl(m_ZExt(m_Value(X)), m_Value(Y))), diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll --- a/llvm/test/Transforms/InstCombine/and.ll +++ b/llvm/test/Transforms/InstCombine/and.ll @@ -1778,9 +1778,8 @@ define i16 @shl_lshr_pow2_const_case1(i16 %x) { ; CHECK-LABEL: @shl_lshr_pow2_const_case1( -; CHECK-NEXT: [[SHL:%.*]] = shl i16 4, [[X:%.*]] -; CHECK-NEXT: [[LSHR:%.*]] = lshr i16 [[SHL]], 6 -; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 7 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i16 8, i16 0 ; CHECK-NEXT: ret i16 [[R]] ; %shl = shl i16 4, %x @@ -1791,9 +1790,8 @@ define <3 x i16> @shl_lshr_pow2_const_case1_uniform_vec(<3 x i16> %x) { ; CHECK-LABEL: @shl_lshr_pow2_const_case1_uniform_vec( -; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> , [[X:%.*]] -; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], -; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[LSHR]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> , <3 x i16> zeroinitializer ; CHECK-NEXT: ret <3 x i16> [[R]] ; %shl = shl <3 x i16> , %x @@ -1804,22 +1802,20 @@ define <3 x i16> @shl_lshr_pow2_const_case1_non_uniform_vec(<3 x i16> %x) { ; CHECK-LABEL: @shl_lshr_pow2_const_case1_non_uniform_vec( -; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> , [[X:%.*]] -; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], -; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[LSHR]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> , <3 x i16> zeroinitializer ; CHECK-NEXT: ret <3 x i16> [[R]] ; - %shl = shl <3 x i16> , %x - %lshr = lshr <3 x i16> %shl, - %r = and <3 x i16> %lshr, + %shl = shl <3 x i16> , %x + %lshr = lshr <3 x i16> %shl, + %r = and <3 x i16> %lshr, ret <3 x i16> %r } define <3 x i16> @shl_lshr_pow2_const_case1_undef1_vec(<3 x i16> %x) { ; CHECK-LABEL: @shl_lshr_pow2_const_case1_undef1_vec( -; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> , [[X:%.*]] -; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], -; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[LSHR]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> , <3 x i16> zeroinitializer ; CHECK-NEXT: ret <3 x i16> [[R]] ; %shl = shl <3 x i16> , %x @@ -1868,6 +1864,8 @@ ret i16 %r } +; TODO: this pattern can be transform to icmp+select + define i16 @shl_lshr_pow2_not_const_case2(i16 %x) { ; CHECK-LABEL: @shl_lshr_pow2_not_const_case2( ; CHECK-NEXT: [[TMP1:%.*]] = shl i16 2, [[X:%.*]] diff --git a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll --- a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll +++ b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll @@ -57,11 +57,9 @@ define i32 @icmp_eq_and_pow2_shl_pow2(i32 %0) { ; CHECK-LABEL: @icmp_eq_and_pow2_shl_pow2( -; CHECK-NEXT: [[SHL:%.*]] = shl i32 2, [[TMP0:%.*]] -; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[SHL]], 4 -; CHECK-NEXT: [[AND_LOBIT:%.*]] = and i32 [[AND]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[AND_LOBIT]], 1 -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP0:%.*]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[TMP3]] ; %shl = shl i32 2, %0 %and = and i32 %shl, 16 @@ -72,11 +70,9 @@ define <2 x i32> @icmp_eq_and_pow2_shl_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_eq_and_pow2_shl_pow2_vec( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> , [[TMP0:%.*]] -; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[SHL]], -; CHECK-NEXT: [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[AND_LOBIT]], -; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %shl = shl <2 x i32> , %0 %and = and <2 x i32> %shl, @@ -87,9 +83,8 @@ define i32 @icmp_ne_and_pow2_shl_pow2(i32 %0) { ; CHECK-LABEL: @icmp_ne_and_pow2_shl_pow2( -; CHECK-NEXT: [[SHL:%.*]] = shl i32 2, [[TMP0:%.*]] -; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[SHL]], 4 -; CHECK-NEXT: [[AND_LOBIT:%.*]] = and i32 [[AND]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0:%.*]], 3 +; CHECK-NEXT: [[AND_LOBIT:%.*]] = zext i1 [[TMP2]] to i32 ; CHECK-NEXT: ret i32 [[AND_LOBIT]] ; %shl = shl i32 2, %0 @@ -101,9 +96,8 @@ define <2 x i32> @icmp_ne_and_pow2_shl_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_ne_and_pow2_shl_pow2_vec( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> , [[TMP0:%.*]] -; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[SHL]], -; CHECK-NEXT: [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[AND_LOBIT:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[AND_LOBIT]] ; %shl = shl <2 x i32> , %0