Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -437,173 +437,6 @@ return Optional>(std::make_pair(LeftType, RightType)); } -/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) into a single -/// (icmp(A & X) ==/!= Y), where the left-hand side is of type Mask_NotAllZeros -/// and the right hand side is of type BMask_Mixed. For example, -/// (icmp (A & 12) != 0) & (icmp (A & 15) == 8) -> (icmp (A & 15) == 8). -static Value * foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( - ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, - Value *A, Value *B, Value *C, Value *D, Value *E, - ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, - llvm::InstCombiner::BuilderTy &Builder) { - // We are given the canonical form: - // (icmp ne (A & B), 0) & (icmp eq (A & D), E). - // where D & E == E. - // - // If IsAnd is false, we get it in negated form: - // (icmp eq (A & B), 0) | (icmp ne (A & D), E) -> - // !((icmp ne (A & B), 0) & (icmp eq (A & D), E)). - // - // We currently handle the case of B, C, D, E are constant. - // - ConstantInt *BCst = dyn_cast(B); - if (!BCst) - return nullptr; - ConstantInt *CCst = dyn_cast(C); - if (!CCst) - return nullptr; - ConstantInt *DCst = dyn_cast(D); - if (!DCst) - return nullptr; - ConstantInt *ECst = dyn_cast(E); - if (!ECst) - return nullptr; - - ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; - - // Update E to the canonical form when D is a power of two and RHS is - // canonicalized as, - // (icmp ne (A & D), 0) -> (icmp eq (A & D), D) or - // (icmp ne (A & D), D) -> (icmp eq (A & D), 0). - if (PredR != NewCC) - ECst = cast(ConstantExpr::getXor(DCst, ECst)); - - // If B or D is zero, skip because if LHS or RHS can be trivially folded by - // other folding rules and this pattern won't apply any more. - if (BCst->getValue() == 0 || DCst->getValue() == 0) - return nullptr; - - // If B and D don't intersect, ie. (B & D) == 0, no folding because we can't - // deduce anything from it. - // For example, - // (icmp ne (A & 12), 0) & (icmp eq (A & 3), 1) -> no folding. - if ((BCst->getValue() & DCst->getValue()) == 0) - return nullptr; - - // If the following two conditions are met: - // - // 1. mask B covers only a single bit that's not covered by mask D, that is, - // (B & (B ^ D)) is a power of 2 (in other words, B minus the intersection of - // B and D has only one bit set) and, - // - // 2. RHS (and E) indicates that the rest of B's bits are zero (in other - // words, the intersection of B and D is zero), that is, ((B & D) & E) == 0 - // - // then that single bit in B must be one and thus the whole expression can be - // folded to - // (A & (B | D)) == (B & (B ^ D)) | E. - // - // For example, - // (icmp ne (A & 12), 0) & (icmp eq (A & 7), 1) -> (icmp eq (A & 15), 9) - // (icmp ne (A & 15), 0) & (icmp eq (A & 7), 0) -> (icmp eq (A & 15), 8) - if ((((BCst->getValue() & DCst->getValue()) & ECst->getValue()) == 0) && - (BCst->getValue() & (BCst->getValue() ^ DCst->getValue())).isPowerOf2()) { - APInt BorD = BCst->getValue() | DCst->getValue(); - APInt BandBxorDorE = (BCst->getValue() & (BCst->getValue() ^ DCst->getValue())) | - ECst->getValue(); - Value *NewMask = ConstantInt::get(BCst->getType(), BorD); - Value *NewMaskedValue = ConstantInt::get(BCst->getType(), BandBxorDorE); - Value *NewAnd = Builder.CreateAnd(A, NewMask); - return Builder.CreateICmp(NewCC, NewAnd, NewMaskedValue); - } - - auto IsSubSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) { - return (C1->getValue() & C2->getValue()) == C1->getValue(); - }; - auto IsSuperSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) { - return (C1->getValue() & C2->getValue()) == C2->getValue(); - }; - - // In the following, we consider only the cases where B is a superset of D, B - // is a subset of D, or B == D because otherwise there's at least one bit - // covered by B but not D, in which case we can't deduce much from it, so - // no folding (aside from the single must-be-one bit case right above.) - // For example, - // (icmp ne (A & 14), 0) & (icmp eq (A & 3), 1) -> no folding. - if (!IsSubSetOrEqual(BCst, DCst) && !IsSuperSetOrEqual(BCst, DCst)) - return nullptr; - - // At this point, either B is a superset of D, B is a subset of D or B == D. - - // If E is zero, if B is a subset of (or equal to) D, LHS and RHS contradict - // and the whole expression becomes false (or true if negated), otherwise, no - // folding. - // For example, - // (icmp ne (A & 3), 0) & (icmp eq (A & 7), 0) -> false. - // (icmp ne (A & 15), 0) & (icmp eq (A & 3), 0) -> no folding. - if (ECst->isZero()) { - if (IsSubSetOrEqual(BCst, DCst)) - return ConstantInt::get(LHS->getType(), !IsAnd); - return nullptr; - } - - // At this point, B, D, E aren't zero and (B & D) == B, (B & D) == D or B == - // D. If B is a superset of (or equal to) D, since E is not zero, LHS is - // subsumed by RHS (RHS implies LHS.) So the whole expression becomes - // RHS. For example, - // (icmp ne (A & 255), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). - // (icmp ne (A & 15), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). - if (IsSuperSetOrEqual(BCst, DCst)) - return RHS; - // Otherwise, B is a subset of D. If B and E have a common bit set, - // ie. (B & E) != 0, then LHS is subsumed by RHS. For example. - // (icmp ne (A & 12), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). - assert(IsSubSetOrEqual(BCst, DCst) && "Precondition due to above code"); - if ((BCst->getValue() & ECst->getValue()) != 0) - return RHS; - // Otherwise, LHS and RHS contradict and the whole expression becomes false - // (or true if negated.) For example, - // (icmp ne (A & 7), 0) & (icmp eq (A & 15), 8) -> false. - // (icmp ne (A & 6), 0) & (icmp eq (A & 15), 8) -> false. - return ConstantInt::get(LHS->getType(), !IsAnd); -} - -/// Try to fold (icmp(A & B) ==/!= 0) &/| (icmp(A & D) ==/!= E) into a single -/// (icmp(A & X) ==/!= Y), where the left-hand side and the right hand side -/// aren't of the common mask pattern type. -static Value *foldLogOpOfMaskedICmpsAsymmetric( - ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, - Value *A, Value *B, Value *C, Value *D, Value *E, - ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, - unsigned LHSMask, unsigned RHSMask, - llvm::InstCombiner::BuilderTy &Builder) { - assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) && - "Expected equality predicates for masked type of icmps."); - // Handle Mask_NotAllZeros-BMask_Mixed cases. - // (icmp ne/eq (A & B), C) &/| (icmp eq/ne (A & D), E), or - // (icmp eq/ne (A & B), C) &/| (icmp ne/eq (A & D), E) - // which gets swapped to - // (icmp ne/eq (A & D), E) &/| (icmp eq/ne (A & B), C). - if (!IsAnd) { - LHSMask = conjugateICmpMask(LHSMask); - RHSMask = conjugateICmpMask(RHSMask); - } - if ((LHSMask & Mask_NotAllZeros) && (RHSMask & BMask_Mixed)) { - if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( - LHS, RHS, IsAnd, A, B, C, D, E, - PredL, PredR, Builder)) { - return V; - } - } else if ((LHSMask & BMask_Mixed) && (RHSMask & Mask_NotAllZeros)) { - if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( - RHS, LHS, IsAnd, A, D, E, B, C, - PredR, PredL, Builder)) { - return V; - } - } - return nullptr; -} - /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) /// into a single (icmp(A & X) ==/!= Y). static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, @@ -619,15 +452,6 @@ unsigned LHSMask = MaskPair->first; unsigned RHSMask = MaskPair->second; unsigned Mask = LHSMask & RHSMask; - if (Mask == 0) { - // Even if the two sides don't share a common pattern, check if folding can - // still happen. - if (Value *V = foldLogOpOfMaskedICmpsAsymmetric( - LHS, RHS, IsAnd, A, B, C, D, E, PredL, PredR, LHSMask, RHSMask, - Builder)) - return V; - return nullptr; - } // In full generality: // (icmp (A & B) Op C) | (icmp (A & D) Op E) @@ -711,41 +535,47 @@ return RHS; } - if (Mask & BMask_Mixed) { - // (icmp eq (A & B), C) & (icmp eq (A & D), E) - // We already know that B & C == C && D & E == E. - // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of - // C and E, which are shared by both the mask B and the mask D, don't - // contradict, then we can transform to - // -> (icmp eq (A & (B|D)), (C|E)) - // Currently, we only handle the case of B, C, D, and E being constant. - // We can't simply use C and E because we might actually handle - // (icmp ne (A & B), B) & (icmp eq (A & D), D) - // with B and D, having a single bit set. - ConstantInt *CCst = dyn_cast(C); - if (!CCst) - return nullptr; - ConstantInt *ECst = dyn_cast(E); - if (!ECst) - return nullptr; - if (PredL != NewCC) - CCst = cast(ConstantExpr::getXor(BCst, CCst)); - if (PredR != NewCC) - ECst = cast(ConstantExpr::getXor(DCst, ECst)); - - // If there is a conflict, we should actually return a false for the - // whole construct. - if (((BCst->getValue() & DCst->getValue()) & - (CCst->getValue() ^ ECst->getValue())).getBoolValue()) - return ConstantInt::get(LHS->getType(), !IsAnd); + // B, C, D, and E must be constants for this optimization + ConstantInt *CCst = dyn_cast(C); + if (!CCst) + return nullptr; + ConstantInt *ECst = dyn_cast(E); + if (!ECst) + return nullptr; - Value *NewOr1 = Builder.CreateOr(B, D); - Value *NewOr2 = ConstantExpr::getOr(CCst, ECst); - Value *NewAnd = Builder.CreateAnd(A, NewOr1); - return Builder.CreateICmp(NewCC, NewAnd, NewOr2); - } + // If B and D intersect, ie. (B & D) != 0, test if they contradict each other + // and if so simplify to false (or true if this is an Or). + // For example, + // (icmp (A & 14), 2) and (icmp (A & 3), 1) -> false. + APInt Intersection = BCst->getValue() & DCst->getValue(); + if (Intersection != 0 && + ((Intersection & CCst->getValue()) != (Intersection & ECst->getValue()))) + return ConstantInt::get(LHS->getType(), !IsAnd); - return nullptr; + // Use an Xor mask to compare full range against zero + // (icmp(A & 4) == 4) && (icmp(A & 3) == 2) + // => (icmp((xor A, 6), 7) == 0) + // icmp(A & 4) == 4) || (icmp(A & 3) == 2) + // => (icmp((xor A, 1), 7) != 0) + APInt BAndD = BCst->getValue() & DCst->getValue(); + Value *X = ConstantExpr::getOr(BCst, DCst); + APInt ZI = APInt(BCst->getBitWidth(), 0), + Zero = APInt(BCst->getBitWidth(), 0), + BI = BCst->getValue(), CI = CCst->getValue(), + DI = DCst->getValue(), EI = ECst->getValue(); + bool LIsEq = (PredL == ICmpInst::ICMP_EQ); + bool RIsEq = (PredR == ICmpInst::ICMP_EQ); + + ZI |= LIsEq ? ~CI & BI : CI & BI; + ZI |= RIsEq ? ~EI & DI : EI & DI; + + if (!IsAnd) + ZI ^= BAndD; + + Value *NewXor = Builder.CreateXor(A, ConstantInt::get(A->getContext(), ZI)); + Value *NewAnd = Builder.CreateAnd(X, NewXor); + return Builder.CreateICmp(IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, + ConstantInt::get(A->getContext(), Zero), NewAnd); } /// Try to fold a signed range checked with lower bound 0 to an unsigned icmp. Index: test/Transforms/InstSimplify/AndOrXor.ll =================================================================== --- test/Transforms/InstSimplify/AndOrXor.ll +++ test/Transforms/InstSimplify/AndOrXor.ll @@ -64,7 +64,7 @@ define i64 @pow2(i32 %x) { ; CHECK-LABEL: @pow2( ; CHECK-NEXT: [[NEGX:%.*]] = sub i32 0, [[X:%.*]] -; CHECK-NEXT: [[X2:%.*]] = and i32 [[X]], [[NEGX]] +; CHECK-NEXT: [[X2:%.*]] = and i32 [[NEGX]], [[X]] ; CHECK-NEXT: [[E:%.*]] = zext i32 [[X2]] to i64 ; CHECK-NEXT: ret i64 [[E]] ; @@ -528,15 +528,12 @@ define i16 @and_of_different_cast_icmps(i8 %i) { ; CHECK-LABEL: @and_of_different_cast_icmps( ; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i8 [[I:%.*]], 0 -; CHECK-NEXT: [[CONV0:%.*]] = zext i1 [[CMP0]] to i16 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[I]], 1 -; CHECK-NEXT: [[CONV1:%.*]] = sext i1 [[CMP1]] to i16 -; CHECK-NEXT: [[AND:%.*]] = and i16 [[CONV0]], [[CONV1]] +; CHECK-NEXT: [[AND:%.*]] = zext i1 [[CMP0]] to i16 ; CHECK-NEXT: ret i16 [[AND]] ; %cmp0 = icmp eq i8 %i, 0 %conv0 = zext i1 %cmp0 to i16 - %cmp1 = icmp eq i8 %i, 1 + %cmp1 = icmp ne i8 %i, 1 %conv1 = sext i1 %cmp1 to i16 %and = and i16 %conv0, %conv1 ret i16 %and @@ -545,10 +542,9 @@ define <2 x i3> @and_of_different_cast_icmps_vec(<2 x i8> %i, <2 x i16> %j) { ; CHECK-LABEL: @and_of_different_cast_icmps_vec( ; CHECK-NEXT: [[CMP0:%.*]] = icmp eq <2 x i8> [[I:%.*]], zeroinitializer -; CHECK-NEXT: [[CONV0:%.*]] = zext <2 x i1> [[CMP0]] to <2 x i3> ; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i16> [[J:%.*]], -; CHECK-NEXT: [[CONV1:%.*]] = zext <2 x i1> [[CMP1]] to <2 x i3> -; CHECK-NEXT: [[AND:%.*]] = and <2 x i3> [[CONV0]], [[CONV1]] +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i1> [[CMP0]], [[CMP1]] +; CHECK-NEXT: [[AND:%.*]] = zext <2 x i1> [[AND1]] to <2 x i3> ; CHECK-NEXT: ret <2 x i3> [[AND]] ; %cmp0 = icmp eq <2 x i8> %i, zeroinitializer @@ -575,7 +571,7 @@ define i3 @or_of_bitcast_icmps_vec(<3 x i65> %i) { ; CHECK-LABEL: @or_of_bitcast_icmps_vec( -; CHECK-NEXT: ret i3 bitcast (<3 x i1> to i3) +; CHECK-NEXT: ret i3 -1 ; %cmp0 = icmp sge <3 x i65> %i, zeroinitializer %conv0 = bitcast <3 x i1> %cmp0 to i3 @@ -591,9 +587,8 @@ ; CHECK-LABEL: @or_of_different_cast_icmps( ; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i8 [[I:%.*]], 0 ; CHECK-NEXT: [[CONV0:%.*]] = zext i1 [[CMP0]] to i16 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i8 [[I]], 1 -; CHECK-NEXT: [[CONV1:%.*]] = sext i1 [[CMP1]] to i16 -; CHECK-NEXT: [[OR:%.*]] = or i16 [[CONV0]], [[CONV1]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[I]], 1 +; CHECK-NEXT: [[OR:%.*]] = select i1 [[CMP1]], i16 [[CONV0]], i16 -1 ; CHECK-NEXT: ret i16 [[OR]] ; %cmp0 = icmp ne i8 %i, 0 @@ -662,7 +657,7 @@ define i32 @test45(i32 %a, i32 %b) { ; CHECK-LABEL: @test45( ; CHECK-NEXT: [[NEGB:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[A:%.*]], [[NEGB]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[NEGB]], [[A:%.*]] ; CHECK-NEXT: ret i32 [[XOR]] ; %nega = xor i32 %a, -1 @@ -676,7 +671,7 @@ define i32 @test45_commuted_and(i32 %a, i32 %b) { ; CHECK-LABEL: @test45_commuted_and( ; CHECK-NEXT: [[NEGB:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[A:%.*]], [[NEGB]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[NEGB]], [[A:%.*]] ; CHECK-NEXT: ret i32 [[XOR]] ; %nega = xor i32 %a, -1 @@ -693,7 +688,7 @@ define i32 @test46(i32 %a, i32 %b) { ; CHECK-LABEL: @test46( ; CHECK-NEXT: [[NEGB:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[A:%.*]], [[NEGB]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[NEGB]], [[A:%.*]] ; CHECK-NEXT: ret i32 [[XOR]] ; %nega = xor i32 %a, -1 @@ -709,7 +704,7 @@ define i32 @test46_commuted_and(i32 %a, i32 %b) { ; CHECK-LABEL: @test46_commuted_and( ; CHECK-NEXT: [[NEGB:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[A:%.*]], [[NEGB]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[NEGB]], [[A:%.*]] ; CHECK-NEXT: ret i32 [[XOR]] ; %nega = xor i32 %a, -1 @@ -738,7 +733,7 @@ define i32 @test48(i32 %a, i32 %b) { ; CHECK-LABEL: @test48( ; CHECK-NEXT: [[NEGA:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[B:%.*]], [[NEGA]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[NEGA]], [[B:%.*]] ; CHECK-NEXT: ret i32 [[XOR]] ; %nega = xor i32 %a, -1 @@ -751,7 +746,7 @@ define i32 @test49(i32 %a, i32 %b) { ; CHECK-LABEL: @test49( ; CHECK-NEXT: [[NEGA:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[B:%.*]], [[NEGA]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[NEGA]], [[B:%.*]] ; CHECK-NEXT: ret i32 [[XOR]] ; %nega = xor i32 %a, -1 @@ -790,7 +785,7 @@ define i32 @test52(i32 %a, i32 %b) { ; CHECK-LABEL: @test52( ; CHECK-NEXT: [[NEGA:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[B:%.*]], [[NEGA]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[NEGA]], [[B:%.*]] ; CHECK-NEXT: ret i32 [[XOR]] ; %nega = xor i32 %a, -1 @@ -803,7 +798,7 @@ define i32 @test53(i32 %a, i32 %b) { ; CHECK-LABEL: @test53( ; CHECK-NEXT: [[NEGA:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[B:%.*]], [[NEGA]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[NEGA]], [[B:%.*]] ; CHECK-NEXT: ret i32 [[XOR]] ; %nega = xor i32 %a, -1 @@ -850,7 +845,7 @@ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[A]], [[B]] ; CHECK-NEXT: [[XNOR:%.*]] = xor i32 [[XOR]], -1 -; CHECK-NEXT: [[OR:%.*]] = or i32 [[XNOR]], [[AND]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[XNOR]] ; CHECK-NEXT: ret i32 [[OR]] ; %and = and i32 %a, %b @@ -884,7 +879,7 @@ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[B:%.*]], [[A:%.*]] ; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[A]], [[B]] ; CHECK-NEXT: [[XNOR:%.*]] = xor i32 [[XOR]], -1 -; CHECK-NEXT: [[OR:%.*]] = or i32 [[XNOR]], [[AND]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[XNOR]] ; CHECK-NEXT: ret i32 [[OR]] ; %and = and i32 %b, %a @@ -917,7 +912,7 @@ define i8 @lshr_undersized_mask(i8 %x) { ; CHECK-LABEL: @lshr_undersized_mask( ; CHECK-NEXT: [[SH:%.*]] = lshr i8 [[X:%.*]], 5 -; CHECK-NEXT: [[MASK:%.*]] = and i8 [[SH]], -2 +; CHECK-NEXT: [[MASK:%.*]] = and i8 [[SH]], 6 ; CHECK-NEXT: ret i8 [[MASK]] ; %sh = lshr i8 %x, 5 @@ -948,7 +943,7 @@ define <2 x i8> @shl_undersized_mask_splat(<2 x i8> %x) { ; CHECK-LABEL: @shl_undersized_mask_splat( ; CHECK-NEXT: [[SH:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i8> [[SH]], +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i8> [[SH]], ; CHECK-NEXT: ret <2 x i8> [[MASK]] ; %sh = shl <2 x i8> %x, @@ -981,7 +976,7 @@ define i64 @shl_or_and2(i32 %a, i1 %b) { ; CHECK-LABEL: @shl_or_and2( ; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[B:%.*]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP1]], 32 ; CHECK-NEXT: ret i64 [[TMP3]] ; %tmp1 = zext i1 %b to i64 @@ -1038,10 +1033,10 @@ define i32 @shl_or_and6(i16 %a, i16 %b) { ; CHECK-LABEL: @shl_or_and6( ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], -65535 +; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[B:%.*]], 1 +; CHECK-NEXT: [[TMP2_MASKED:%.*]] = zext i16 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP2_MASKED]] ; CHECK-NEXT: ret i32 [[TMP5]] ; %tmp1 = zext i16 %a to i32 @@ -1056,10 +1051,8 @@ define i32 @shl_or_and7(i16 %a, i16 %b) { ; CHECK-LABEL: @shl_or_and7( ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], -131072 +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP3]], -131072 ; CHECK-NEXT: ret i32 [[TMP5]] ; %tmp1 = zext i16 %a to i32 @@ -1076,8 +1069,8 @@ ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 131071 +; CHECK-NEXT: [[TMP3_MASKED:%.*]] = and i32 [[TMP3]], 65536 +; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP3_MASKED]], [[TMP2]] ; CHECK-NEXT: ret i32 [[TMP5]] ; %tmp1 = zext i16 %a to i32 @@ -1104,7 +1097,7 @@ define <2 x i64> @shl_or_and2v(<2 x i32> %a, <2 x i1> %b) { ; CHECK-LABEL: @shl_or_and2v( ; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <2 x i64> [[TMP1]], ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %tmp1 = zext <2 x i1> %b to <2 x i64> @@ -1119,10 +1112,10 @@ ; A variation of above test case, but fails due to the mask value ; CHECK-LABEL: @shl_or_and3v( ; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[B:%.*]] to <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[B:%.*]], +; CHECK-NEXT: [[TMP2_MASKED:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP3]], [[TMP2_MASKED]] ; CHECK-NEXT: ret <2 x i32> [[TMP5]] ; %tmp1 = zext <2 x i16> %a to <2 x i32> @@ -1132,3 +1125,34 @@ %tmp5 = and <2 x i32> %tmp4, ; mask with 0xFFFF0001 ret <2 x i32> %tmp5 } + +define i1 @icmp_set_and_unset_bits(i32 %a) { +; CHECK-LABEL: @icmp_set_and_unset_bits( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 7 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %tmp1 = and i32 %a, 1 + %tmp2 = icmp eq i32 %tmp1, 0 + %tmp3 = and i32 %a, 6 + %tmp4 = icmp ne i32 %tmp3, 0 + %tmp5 = and i1 %tmp2, %tmp4 + ret i1 %tmp5 +} + +define i1 @icmp_set_and_unset_bits_recursive(i32 %a) { +; CHECK-LABEL: @icmp_set_and_unset_bits_recursive( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 199 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 134 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %tmp1 = and i32 %a, 1 + %tmp2 = icmp eq i32 %tmp1, 0 + %tmp3 = and i32 %a, 6 + %tmp4 = icmp ne i32 %tmp3, 0 + %tmp5 = and i32 %a, 192 + %tmp6 = icmp ne i32 %tmp5, 128 + %tmp7 = and i1 %tmp2, %tmp4 + %tmp8 = and i1 %tmp6, %tmp7 + ret i1 %tmp8 +}