Index: llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -922,10 +922,24 @@ } } - // icmp ne A, B is equal to xor A, B when A and B only really have one bit. - // It is also profitable to transform icmp eq into not(xor(A, B)) because that - // may lead to additional simplifications. if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) { + // Test if a bit is clear/set using a shifted-one mask: + // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1 + // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1 + Value *X, *ShAmt; + if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) && + match(Cmp->getOperand(0), + m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) { + if (Cmp->getPredicate() == ICmpInst::ICMP_EQ) + X = Builder.CreateNot(X); + Value *Lshr = Builder.CreateLShr(X, ShAmt); + Value *And1 = Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1)); + return replaceInstUsesWith(Zext, And1); + } + + // icmp ne A, B is equal to xor A, B when A and B only really have one bit. + // It is also profitable to transform icmp eq into not(xor(A, B)) because + // that may lead to additional simplifications. if (IntegerType *ITy = dyn_cast(Zext.getType())) { Value *LHS = Cmp->getOperand(0); Value *RHS = Cmp->getOperand(1); Index: llvm/test/Transforms/InstCombine/zext.ll =================================================================== --- llvm/test/Transforms/InstCombine/zext.ll +++ llvm/test/Transforms/InstCombine/zext.ll @@ -177,11 +177,9 @@ define i32 @masked_bit_set(i32 %x, i32 %y) { ; CHECK-LABEL: @masked_bit_set( -; CHECK-NEXT: [[SH1:%.*]] = shl i32 1, [[Y:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and i32 [[SH1]], [[X:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 -; CHECK-NEXT: [[R:%.*]] = zext i1 [[CMP]] to i32 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1 +; CHECK-NEXT: ret i32 [[TMP2]] ; %sh1 = shl i32 1, %y %and = and i32 %sh1, %x @@ -192,11 +190,10 @@ define <2 x i32> @masked_bit_clear(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @masked_bit_clear( -; CHECK-NEXT: [[SH1:%.*]] = shl <2 x i32> , [[Y:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[SH1]], [[X:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[AND]], zeroinitializer -; CHECK-NEXT: [[R:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i32> [[TMP2]], +; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %sh1 = shl <2 x i32> , %y %and = and <2 x i32> %sh1, %x @@ -208,11 +205,9 @@ define <2 x i32> @masked_bit_set_commute(<2 x i32> %px, <2 x i32> %y) { ; CHECK-LABEL: @masked_bit_set_commute( ; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> , [[PX:%.*]] -; CHECK-NEXT: [[SH1:%.*]] = shl <2 x i32> , [[Y:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X]], [[SH1]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], zeroinitializer -; CHECK-NEXT: [[R:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %x = srem <2 x i32> , %px ; thwart complexity-based canonicalization %sh1 = shl <2 x i32> , %y @@ -225,11 +220,10 @@ define i32 @masked_bit_clear_commute(i32 %px, i32 %y) { ; CHECK-LABEL: @masked_bit_clear_commute( ; CHECK-NEXT: [[X:%.*]] = srem i32 42, [[PX:%.*]] -; CHECK-NEXT: [[SH1:%.*]] = shl i32 1, [[Y:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[SH1]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 -; CHECK-NEXT: [[R:%.*]] = zext i1 [[CMP]] to i32 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1 +; CHECK-NEXT: ret i32 [[TMP3]] ; %x = srem i32 42, %px ; thwart complexity-based canonicalization %sh1 = shl i32 1, %y @@ -243,10 +237,9 @@ ; CHECK-LABEL: @masked_bit_set_use1( ; CHECK-NEXT: [[SH1:%.*]] = shl i32 1, [[Y:%.*]] ; CHECK-NEXT: call void @use32(i32 [[SH1]]) -; CHECK-NEXT: [[AND:%.*]] = and i32 [[SH1]], [[X:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 -; CHECK-NEXT: [[R:%.*]] = zext i1 [[CMP]] to i32 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1 +; CHECK-NEXT: ret i32 [[TMP2]] ; %sh1 = shl i32 1, %y call void @use32(i32 %sh1) @@ -256,6 +249,8 @@ ret i32 %r } +; Negative test + define i32 @masked_bit_set_use2(i32 %x, i32 %y) { ; CHECK-LABEL: @masked_bit_set_use2( ; CHECK-NEXT: [[SH1:%.*]] = shl i32 1, [[Y:%.*]] @@ -273,6 +268,8 @@ ret i32 %r } +; Negative test + define i32 @masked_bit_set_use3(i32 %x, i32 %y) { ; CHECK-LABEL: @masked_bit_set_use3( ; CHECK-NEXT: [[SH1:%.*]] = shl i32 1, [[Y:%.*]] @@ -294,10 +291,10 @@ ; CHECK-LABEL: @masked_bit_clear_use1( ; CHECK-NEXT: [[SH1:%.*]] = shl i32 1, [[Y:%.*]] ; CHECK-NEXT: call void @use32(i32 [[SH1]]) -; CHECK-NEXT: [[AND:%.*]] = and i32 [[SH1]], [[X:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 -; CHECK-NEXT: [[R:%.*]] = zext i1 [[CMP]] to i32 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], [[Y]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1 +; CHECK-NEXT: ret i32 [[TMP3]] ; %sh1 = shl i32 1, %y call void @use32(i32 %sh1) @@ -307,6 +304,8 @@ ret i32 %r } +; Negative test + define i32 @masked_bit_clear_use2(i32 %x, i32 %y) { ; CHECK-LABEL: @masked_bit_clear_use2( ; CHECK-NEXT: [[SH1:%.*]] = shl i32 1, [[Y:%.*]] @@ -324,6 +323,8 @@ ret i32 %r } +; Negative test + define i32 @masked_bit_clear_use3(i32 %x, i32 %y) { ; CHECK-LABEL: @masked_bit_clear_use3( ; CHECK-NEXT: [[SH1:%.*]] = shl i32 1, [[Y:%.*]] @@ -341,6 +342,8 @@ ret i32 %r } +; Negative test + define i32 @masked_bits_set(i32 %x, i32 %y) { ; CHECK-LABEL: @masked_bits_set( ; CHECK-NEXT: [[SH1:%.*]] = shl i32 3, [[Y:%.*]] @@ -356,6 +359,8 @@ ret i32 %r } +; Negative test + define i32 @div_bit_set(i32 %x, i32 %y) { ; CHECK-LABEL: @div_bit_set( ; CHECK-NEXT: [[SH1:%.*]] = shl i32 1, [[Y:%.*]] @@ -371,6 +376,8 @@ ret i32 %r } +; Negative test + define i32 @masked_bit_set_nonzero_cmp(i32 %x, i32 %y) { ; CHECK-LABEL: @masked_bit_set_nonzero_cmp( ; CHECK-NEXT: [[SH1:%.*]] = shl i32 1, [[Y:%.*]] @@ -386,6 +393,8 @@ ret i32 %r } +; Negative test + define i32 @masked_bit_wrong_pred(i32 %x, i32 %y) { ; CHECK-LABEL: @masked_bit_wrong_pred( ; CHECK-NEXT: [[SH1:%.*]] = shl i32 1, [[Y:%.*]]