Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -205,9 +205,9 @@ /// bits of A are cleared in B. /// Example: (icmp eq (A & 3), 0) -> Mask_AllZeroes /// -/// "Mixed" declares that (A & B) == C and C might or might not contain any -/// number of one bits and zero bits. -/// Example: (icmp eq (A & 3), 1) -> AMask_Mixed +/// "SingleBit" declares that (A & B) == C and B.populationCount() == 1 +/// "NotSingleBit" declares that (A & B) != C and B.populationCount() == 1 +/// (we do not have this for A because we have normalized constants to the right) /// /// "Not" means that in above descriptions "==" should be replaced by "!=". /// Example: (icmp ne (A & 3), 3) -> AMask_NotAllOnes @@ -222,10 +222,8 @@ BMask_NotAllOnes = 8, Mask_AllZeros = 16, Mask_NotAllZeros = 32, - AMask_Mixed = 64, - AMask_NotMixed = 128, - BMask_Mixed = 256, - BMask_NotMixed = 512 + BMask_SingleBit = 64, + BMask_NotSingleBit = 128, }; /// Return the set of patterns (from MaskedICmpType) that (icmp SCC (A & B), C) @@ -241,37 +239,29 @@ unsigned MaskVal = 0; if (CCst && CCst->isZero()) { // if C is zero, then both A and B qualify as mask - MaskVal |= (IsEq ? (Mask_AllZeros | AMask_Mixed | BMask_Mixed) - : (Mask_NotAllZeros | AMask_NotMixed | BMask_NotMixed)); + MaskVal |= (IsEq ? Mask_AllZeros : Mask_NotAllZeros); if (IsAPow2) - MaskVal |= (IsEq ? (AMask_NotAllOnes | AMask_NotMixed) - : (AMask_AllOnes | AMask_Mixed)); + MaskVal |= (IsEq ? AMask_NotAllOnes : AMask_AllOnes); if (IsBPow2) - MaskVal |= (IsEq ? (BMask_NotAllOnes | BMask_NotMixed) - : (BMask_AllOnes | BMask_Mixed)); + MaskVal |= (IsEq ? BMask_NotAllOnes : BMask_AllOnes); return MaskVal; } if (A == C) { - MaskVal |= (IsEq ? (AMask_AllOnes | AMask_Mixed) - : (AMask_NotAllOnes | AMask_NotMixed)); + MaskVal |= IsEq ? AMask_AllOnes : AMask_NotAllOnes; if (IsAPow2) - MaskVal |= (IsEq ? (Mask_NotAllZeros | AMask_NotMixed) - : (Mask_AllZeros | AMask_Mixed)); - } else if (ACst && CCst && ConstantExpr::getAnd(ACst, CCst) == CCst) { - MaskVal |= (IsEq ? AMask_Mixed : AMask_NotMixed); + MaskVal |= IsEq ? Mask_NotAllZeros : Mask_AllZeros; } if (B == C) { - MaskVal |= (IsEq ? (BMask_AllOnes | BMask_Mixed) - : (BMask_NotAllOnes | BMask_NotMixed)); + MaskVal |= IsEq ? BMask_AllOnes : BMask_NotAllOnes; if (IsBPow2) - MaskVal |= (IsEq ? (Mask_NotAllZeros | BMask_NotMixed) - : (Mask_AllZeros | BMask_Mixed)); - } else if (BCst && CCst && ConstantExpr::getAnd(BCst, CCst) == CCst) { - MaskVal |= (IsEq ? BMask_Mixed : BMask_NotMixed); + MaskVal |= IsEq ? Mask_NotAllZeros : Mask_AllZeros; } + if (IsBPow2) + MaskVal |= IsEq ? BMask_SingleBit : BMask_NotSingleBit; + return MaskVal; } @@ -279,14 +269,14 @@ /// operations had the opposite sense. Since each "NotXXX" flag (recording !=) /// is adjacent to the corresponding normal flag (recording ==), this just /// involves swapping those bits over. +/// SingleBit masks are not included here, because the conjugate would imply +/// that the other mask must be a subset or a SingleBit. static unsigned conjugateICmpMask(unsigned Mask) { unsigned NewMask; - NewMask = (Mask & (AMask_AllOnes | BMask_AllOnes | Mask_AllZeros | - AMask_Mixed | BMask_Mixed)) + NewMask = (Mask & (AMask_AllOnes | BMask_AllOnes | Mask_AllZeros)) << 1; - NewMask |= (Mask & (AMask_NotAllOnes | BMask_NotAllOnes | Mask_NotAllZeros | - AMask_NotMixed | BMask_NotMixed)) + NewMask |= (Mask & (AMask_NotAllOnes | BMask_NotAllOnes | Mask_NotAllZeros)) >> 1; return NewMask; @@ -304,23 +294,18 @@ return true; } -/// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E). +/// Handle (icmp(A & B) ==/!= C) &/| (icmp(D & E) ==/!= F). /// Return the pattern classes (from MaskedICmpType) for the left hand side and /// the right hand side as a pair. /// LHS and RHS are the left hand side and the right hand side ICmps and PredL /// and PredR are their predicates, respectively. -static -Optional> -getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C, - Value *&D, Value *&E, ICmpInst *LHS, - ICmpInst *RHS, +static void getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C, + Value *&D, Value *&E, Value *&F, + bool *DIsA, + unsigned *LeftType, unsigned *RightType, + ICmpInst *LHS, ICmpInst *RHS, ICmpInst::Predicate &PredL, ICmpInst::Predicate &PredR) { - // vectors are not (yet?) supported. Don't support pointers either. - if (!LHS->getOperand(0)->getType()->isIntegerTy() || - !RHS->getOperand(0)->getType()->isIntegerTy()) - return None; - // Here comes the tricky part: // LHS might be of the form L11 & L12 == X, X == L21 & L22, // and L11 & L12 == L21 & L22. The same goes for RHS. @@ -334,304 +319,193 @@ if (decomposeBitTestICmp(L1, L2, PredL, L11, L12, L2)) { L21 = L22 = L1 = nullptr; } else { + // If we cannot satisify DIsA, then we prefer if A or B is a constant + // If we do satisify DIsA A B and C will get overwritten when we check against D + bool GotLConst = false; + // Look for ANDs in the LHS icmp. if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) { // Any icmp can be viewed as being trivially masked; if it allows us to // remove one, it's worth it. L11 = L1; L12 = Constant::getAllOnesValue(L1->getType()); + } else { + // Normalize by putting the constant (if any) on the right + if (dyn_cast(L11)) { + A = L12; + B = L11; + C = L2; + GotLConst = true; + } else { + A = L11; + B = L12; + C = L2; + if (dyn_cast(L12)) + GotLConst = true; + } } if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) { L21 = L2; L22 = Constant::getAllOnesValue(L2->getType()); + } else if (!GotLConst) { + // Normalize by putting the constant (if any) on the right + if (dyn_cast(L11)) { + A = L12; + B = L11; + C = L2; + } else { + A = L11; + B = L12; + C = L2; + } } } - // Bail if LHS was a icmp that can't be decomposed into an equality. - if (!ICmpInst::isEquality(PredL)) - return None; - Value *R1 = RHS->getOperand(0); Value *R2 = RHS->getOperand(1); - Value *R11, *R12; - bool Ok = false; + Value *R11, *R12, *R21, *R22; + bool GotRConst = false; if (decomposeBitTestICmp(R1, R2, PredR, R11, R12, R2)) { if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { - A = R11; - D = R12; - } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { - A = R12; D = R11; + E = R12; + *DIsA = true; + } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { + D = R12; + E = R11; + *DIsA = true; } else { - return None; + // Normalize by putting the constant (if any) on the right + if (dyn_cast(R11)) { + D = R12; + E = R11; + GotRConst = true; + } else { + D = R11; + E = R12; + if (dyn_cast(R12)) + GotRConst = true; + } } - E = R2; + F = R2; R1 = nullptr; - Ok = true; } else { if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) { // As before, model no mask as a trivial mask if it'll let us do an // optimization. R11 = R1; R12 = Constant::getAllOnesValue(R1->getType()); + } else if (!GotRConst) { + // Normalize by putting the constant (if any) on the right + if (dyn_cast(R11)) { + D = R12; + E = R11; + F = R2; + GotRConst = true; + } else { + D = R11; + E = R12; + F = R2; + if (dyn_cast(R12)) + GotRConst = true; + } } if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { - A = R11; - D = R12; - E = R2; - Ok = true; - } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { - A = R12; D = R11; - E = R2; - Ok = true; + E = R12; + F = R2; + *DIsA = true; + } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { + D = R12; + E = R11; + F = R2; + *DIsA = true; } } - // Bail if RHS was a icmp that can't be decomposed into an equality. - if (!ICmpInst::isEquality(PredR)) - return None; - // Look for ANDs on the right side of the RHS icmp. - if (!Ok) { - if (!match(R2, m_And(m_Value(R11), m_Value(R12)))) { - R11 = R2; - R12 = Constant::getAllOnesValue(R2->getType()); + if (!DIsA) { + if (!match(R2, m_And(m_Value(R21), m_Value(R22)))) { + R21 = R2; + R22 = Constant::getAllOnesValue(R2->getType()); + } else if (!GotRConst) { + // Normalize by putting the constant (if any) on the right + if (dyn_cast(R11)) { + D = R22; + E = R21; + F = R1; + } else { + D = R21; + E = R22; + F = R1; + } } - if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { - A = R11; - D = R12; - E = R1; - Ok = true; - } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { - A = R12; - D = R11; - E = R1; - Ok = true; - } else { - return None; + if (R21 == L11 || R21 == L12 || R21 == L21 || R21 == L22) { + D = R21; + E = R22; + F = R1; + *DIsA = true; + } else if (R22 == L21 || R22 == L12 || R22 == L21 || R22 == L22) { + D = R22; + E = R21; + F = R1; + *DIsA = true; } } - if (!Ok) - return None; - if (L11 == A) { + if (L11 == D) { + A = D; B = L12; C = L2; - } else if (L12 == A) { + } else if (L12 == D) { + A = D; B = L11; C = L2; - } else if (L21 == A) { + } else if (L21 == D) { + A = D; B = L22; C = L1; - } else if (L22 == A) { + } else if (L22 == D) { + A = D; B = L21; C = L1; } - unsigned LeftType = getMaskedICmpType(A, B, C, PredL); - unsigned RightType = getMaskedICmpType(A, D, E, PredR); - return Optional>(std::make_pair(LeftType, RightType)); + *LeftType = getMaskedICmpType(A, B, C, PredL); + *RightType = getMaskedICmpType(D, E, F, PredR); } -/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) into a single -/// (icmp(A & X) ==/!= Y), where the left-hand side is of type Mask_NotAllZeros -/// and the right hand side is of type BMask_Mixed. For example, -/// (icmp (A & 12) != 0) & (icmp (A & 15) == 8) -> (icmp (A & 15) == 8). -static Value * foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( - ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, - Value *A, Value *B, Value *C, Value *D, Value *E, - ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, - llvm::InstCombiner::BuilderTy &Builder) { - // We are given the canonical form: - // (icmp ne (A & B), 0) & (icmp eq (A & D), E). - // where D & E == E. - // - // If IsAnd is false, we get it in negated form: - // (icmp eq (A & B), 0) | (icmp ne (A & D), E) -> - // !((icmp ne (A & B), 0) & (icmp eq (A & D), E)). - // - // We currently handle the case of B, C, D, E are constant. - // - ConstantInt *BCst = dyn_cast(B); - if (!BCst) - return nullptr; - ConstantInt *CCst = dyn_cast(C); - if (!CCst) - return nullptr; - ConstantInt *DCst = dyn_cast(D); - if (!DCst) - return nullptr; - ConstantInt *ECst = dyn_cast(E); - if (!ECst) - return nullptr; - - ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; - - // Update E to the canonical form when D is a power of two and RHS is - // canonicalized as, - // (icmp ne (A & D), 0) -> (icmp eq (A & D), D) or - // (icmp ne (A & D), D) -> (icmp eq (A & D), 0). - if (PredR != NewCC) - ECst = cast(ConstantExpr::getXor(DCst, ECst)); - - // If B or D is zero, skip because if LHS or RHS can be trivially folded by - // other folding rules and this pattern won't apply any more. - if (BCst->getValue() == 0 || DCst->getValue() == 0) - return nullptr; - - // If B and D don't intersect, ie. (B & D) == 0, no folding because we can't - // deduce anything from it. - // For example, - // (icmp ne (A & 12), 0) & (icmp eq (A & 3), 1) -> no folding. - if ((BCst->getValue() & DCst->getValue()) == 0) - return nullptr; - - // If the following two conditions are met: - // - // 1. mask B covers only a single bit that's not covered by mask D, that is, - // (B & (B ^ D)) is a power of 2 (in other words, B minus the intersection of - // B and D has only one bit set) and, - // - // 2. RHS (and E) indicates that the rest of B's bits are zero (in other - // words, the intersection of B and D is zero), that is, ((B & D) & E) == 0 - // - // then that single bit in B must be one and thus the whole expression can be - // folded to - // (A & (B | D)) == (B & (B ^ D)) | E. - // - // For example, - // (icmp ne (A & 12), 0) & (icmp eq (A & 7), 1) -> (icmp eq (A & 15), 9) - // (icmp ne (A & 15), 0) & (icmp eq (A & 7), 0) -> (icmp eq (A & 15), 8) - if ((((BCst->getValue() & DCst->getValue()) & ECst->getValue()) == 0) && - (BCst->getValue() & (BCst->getValue() ^ DCst->getValue())).isPowerOf2()) { - APInt BorD = BCst->getValue() | DCst->getValue(); - APInt BandBxorDorE = (BCst->getValue() & (BCst->getValue() ^ DCst->getValue())) | - ECst->getValue(); - Value *NewMask = ConstantInt::get(BCst->getType(), BorD); - Value *NewMaskedValue = ConstantInt::get(BCst->getType(), BandBxorDorE); - Value *NewAnd = Builder.CreateAnd(A, NewMask); - return Builder.CreateICmp(NewCC, NewAnd, NewMaskedValue); - } - - auto IsSubSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) { - return (C1->getValue() & C2->getValue()) == C1->getValue(); - }; - auto IsSuperSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) { - return (C1->getValue() & C2->getValue()) == C2->getValue(); - }; - - // In the following, we consider only the cases where B is a superset of D, B - // is a subset of D, or B == D because otherwise there's at least one bit - // covered by B but not D, in which case we can't deduce much from it, so - // no folding (aside from the single must-be-one bit case right above.) - // For example, - // (icmp ne (A & 14), 0) & (icmp eq (A & 3), 1) -> no folding. - if (!IsSubSetOrEqual(BCst, DCst) && !IsSuperSetOrEqual(BCst, DCst)) - return nullptr; - - // At this point, either B is a superset of D, B is a subset of D or B == D. - - // If E is zero, if B is a subset of (or equal to) D, LHS and RHS contradict - // and the whole expression becomes false (or true if negated), otherwise, no - // folding. - // For example, - // (icmp ne (A & 3), 0) & (icmp eq (A & 7), 0) -> false. - // (icmp ne (A & 15), 0) & (icmp eq (A & 3), 0) -> no folding. - if (ECst->isZero()) { - if (IsSubSetOrEqual(BCst, DCst)) - return ConstantInt::get(LHS->getType(), !IsAnd); - return nullptr; - } - - // At this point, B, D, E aren't zero and (B & D) == B, (B & D) == D or B == - // D. If B is a superset of (or equal to) D, since E is not zero, LHS is - // subsumed by RHS (RHS implies LHS.) So the whole expression becomes - // RHS. For example, - // (icmp ne (A & 255), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). - // (icmp ne (A & 15), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). - if (IsSuperSetOrEqual(BCst, DCst)) - return RHS; - // Otherwise, B is a subset of D. If B and E have a common bit set, - // ie. (B & E) != 0, then LHS is subsumed by RHS. For example. - // (icmp ne (A & 12), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). - assert(IsSubSetOrEqual(BCst, DCst) && "Precondition due to above code"); - if ((BCst->getValue() & ECst->getValue()) != 0) - return RHS; - // Otherwise, LHS and RHS contradict and the whole expression becomes false - // (or true if negated.) For example, - // (icmp ne (A & 7), 0) & (icmp eq (A & 15), 8) -> false. - // (icmp ne (A & 6), 0) & (icmp eq (A & 15), 8) -> false. - return ConstantInt::get(LHS->getType(), !IsAnd); -} - -/// Try to fold (icmp(A & B) ==/!= 0) &/| (icmp(A & D) ==/!= E) into a single -/// (icmp(A & X) ==/!= Y), where the left-hand side and the right hand side -/// aren't of the common mask pattern type. -static Value *foldLogOpOfMaskedICmpsAsymmetric( - ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, - Value *A, Value *B, Value *C, Value *D, Value *E, - ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, - unsigned LHSMask, unsigned RHSMask, - llvm::InstCombiner::BuilderTy &Builder) { - assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) && - "Expected equality predicates for masked type of icmps."); - // Handle Mask_NotAllZeros-BMask_Mixed cases. - // (icmp ne/eq (A & B), C) &/| (icmp eq/ne (A & D), E), or - // (icmp eq/ne (A & B), C) &/| (icmp ne/eq (A & D), E) - // which gets swapped to - // (icmp ne/eq (A & D), E) &/| (icmp eq/ne (A & B), C). - if (!IsAnd) { - LHSMask = conjugateICmpMask(LHSMask); - RHSMask = conjugateICmpMask(RHSMask); - } - if ((LHSMask & Mask_NotAllZeros) && (RHSMask & BMask_Mixed)) { - if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( - LHS, RHS, IsAnd, A, B, C, D, E, - PredL, PredR, Builder)) { - return V; - } - } else if ((LHSMask & BMask_Mixed) && (RHSMask & Mask_NotAllZeros)) { - if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( - RHS, LHS, IsAnd, A, D, E, B, C, - PredR, PredL, Builder)) { - return V; - } - } - return nullptr; -} - -/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) -/// into a single (icmp(A & X) ==/!= Y). +/// Try to fold (icmp(_ & _) ==/!= _) &/| _ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, llvm::InstCombiner::BuilderTy &Builder) { - Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; + Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr, *F = nullptr; + // We only handle logical operations ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); - Optional> MaskPair = - getMaskedTypeForICmpPair(A, B, C, D, E, LHS, RHS, PredL, PredR); - if (!MaskPair) + if (!(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR))) return nullptr; - assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) && - "Expected equality predicates for masked type of icmps."); - unsigned LHSMask = MaskPair->first; - unsigned RHSMask = MaskPair->second; - unsigned Mask = LHSMask & RHSMask; - if (Mask == 0) { - // Even if the two sides don't share a common pattern, check if folding can - // still happen. - if (Value *V = foldLogOpOfMaskedICmpsAsymmetric( - LHS, RHS, IsAnd, A, B, C, D, E, PredL, PredR, LHSMask, RHSMask, - Builder)) - return V; + bool LIsEq = (PredL == ICmpInst::ICMP_EQ); + bool RIsEq = (PredR == ICmpInst::ICMP_EQ); + + // vectors are not (yet) supported. Don't support pointers either. + if (!LHS->getOperand(0)->getType()->isIntegerTy() || + !RHS->getOperand(0)->getType()->isIntegerTy()) return nullptr; - } + + /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & E) ==/!= F) + /// into a single (icmp(A & X) ==/!= Y). + bool DIsA; + unsigned LHSMask; + unsigned RHSMask; + getMaskedTypeForICmpPair(A, B, C, D, E, F, &DIsA, &LHSMask, &RHSMask, + LHS, RHS, PredL, PredR); + unsigned Mask = LHSMask & RHSMask; // In full generality: - // (icmp (A & B) Op C) | (icmp (A & D) Op E) - // == ![ (icmp (A & B) !Op C) & (icmp (A & D) !Op E) ] + // (icmp (A & B) Op C) | (icmp (D & E) Op F) + // == ![ (icmp (A & B) !Op C) & (icmp (D & E) !Op F) ] // // If the latter can be converted into (icmp (A & X) Op Y) then the former is // equivalent to (icmp (A & X) !Op Y). @@ -648,7 +522,7 @@ Mask = conjugateICmpMask(Mask); } - if (Mask & Mask_AllZeros) { + if (DIsA && Mask & Mask_AllZeros) { // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) // -> (icmp eq (A & (B|D)), 0) Value *NewOr = Builder.CreateOr(B, D); @@ -659,92 +533,119 @@ Value *Zero = Constant::getNullValue(A->getType()); return Builder.CreateICmp(NewCC, NewAnd, Zero); } - if (Mask & BMask_AllOnes) { - // (icmp eq (A & B), B) & (icmp eq (A & D), D) - // -> (icmp eq (A & (B|D)), (B|D)) - Value *NewOr = Builder.CreateOr(B, D); + if (DIsA && Mask & BMask_AllOnes) { + // (icmp eq (A & B), B) & (icmp eq (A & E), E) + // -> (icmp eq (A & (B|E)), (B|E)) + Value *NewOr = Builder.CreateOr(B, E); Value *NewAnd = Builder.CreateAnd(A, NewOr); return Builder.CreateICmp(NewCC, NewAnd, NewOr); } - if (Mask & AMask_AllOnes) { - // (icmp eq (A & B), A) & (icmp eq (A & D), A) - // -> (icmp eq (A & (B&D)), A) - Value *NewAnd1 = Builder.CreateAnd(B, D); + if (DIsA && Mask & AMask_AllOnes) { + // (icmp eq (A & B), A) & (icmp eq (A & E), A) + // -> (icmp eq (A & (B&E)), A) + Value *NewAnd1 = Builder.CreateAnd(B, E); Value *NewAnd2 = Builder.CreateAnd(A, NewAnd1); return Builder.CreateICmp(NewCC, NewAnd2, A); } - - // Remaining cases assume at least that B and D are constant, and depend on - // their actual values. This isn't strictly necessary, just a "handle the - // easy cases for now" decision. + ConstantInt *CCst = dyn_cast(C); + ConstantInt *FCst = dyn_cast(F); + if (LIsEq && RIsEq && CCst && FCst && + (CCst->getValue() & FCst->getValue()) == 0 && CCst->getValue() != 0 && FCst->getValue() != 0) { + // If C and F are disjoint and both comparisons are equals + // (icmp eq (A & B), C) & (icmp eq (D & E), F), given (C & F) == 0 and C != 0 and F != 0 + // -> (icmp eq ((A|D) & (B|E)), (C|F)) + Value *NewA = Builder.CreateOr(A, D); + Value *NewB = Builder.CreateOr(B, E); + Value *NewAnd = Builder.CreateAnd(NewA, NewB); + return Builder.CreateICmp(ICmpInst::ICMP_EQ, + NewAnd, ConstantInt::get(A->getContext(), CCst->getValue() | FCst->getValue())); + } ConstantInt *BCst = dyn_cast(B); + ConstantInt *ECst = dyn_cast(E); + if (BCst && ECst && LIsEq && RIsEq && (BCst->getValue() & ECst->getValue()) == 0) { + // If B and E are disjoint and both comparisons are equals + // (icmp eq (A & B), C) & (icmp eq (D & E), F), given (B & E) == 0 + // -> (icmp eq ((A|D) & (B|E)), (C|F)) + Value *NewA = Builder.CreateOr(A, D); + Value *NewAnd = Builder.CreateAnd( + NewA, ConstantInt::get(A->getContext(), BCst->getValue() | ECst->getValue())); + Value *NewC = Builder.CreateOr(C, F); + return Builder.CreateICmp(ICmpInst::ICMP_EQ, NewAnd, NewC); + } + if (BCst && !LIsEq && RIsEq && LHSMask & BMask_NotSingleBit) { + // Expand (icmp ne (A & 32), C) + // -> (icmp eq (A & 32), C ^ 32) + // And then merge across with RHS, like above. + Value *NewPreC = Builder.CreateOr(C, F); + Value *NewC = Builder.CreateXor(NewPreC, B); + Value *NewB = Builder.CreateOr(B, E); + Value *NewA = Builder.CreateOr(A, D); + Value *NewAnd = Builder.CreateAnd(NewA, NewB); + return Builder.CreateICmp(ICmpInst::ICMP_EQ, NewAnd, NewC); + } + if (ECst && LIsEq && !RIsEq && RHSMask & BMask_NotSingleBit) { + // Expand (icmp ne (D & 32), F) + // -> (icmp eq (D & 32), F ^ 32) + // And then merge across with LHS, like above. + Value *NewPreC = Builder.CreateOr(C, F); + Value *NewC = Builder.CreateXor(NewPreC, E); + Value *NewB = Builder.CreateOr(B, E); + Value *NewA = Builder.CreateOr(A, D); + Value *NewAnd = Builder.CreateAnd(NewA, NewB); + return Builder.CreateICmp(ICmpInst::ICMP_EQ, NewAnd, NewC); + } + if (!LIsEq && !RIsEq && + LHSMask & BMask_NotSingleBit && RHSMask & BMask_NotSingleBit) { + // Expand twice + // (icmp ne (A & 128), C) + // -> (icmp eq (A & 128), C ^ 128) + // (icmp ne (D & 32), F) + // -> (icmp eq (D & 32), F ^ 32) + // And then merge across, similar to above. + Value *NewPreC = Builder.CreateOr(C, F); + Value *NewC = Builder.CreateXor( + NewPreC, ConstantInt::get(B->getContext(), BCst->getValue() | ECst->getValue())); + Value *NewB = Builder.CreateOr(B, E); + Value *NewA = Builder.CreateOr(A, D); + Value *NewAnd = Builder.CreateAnd(NewA, NewB); + return Builder.CreateICmp(ICmpInst::ICMP_EQ, NewAnd, NewC); + } + + // Remaining cases assume at least that B and E are constant, and depend on + // their actual values. This is necessary in order to know how and if B and E + // intersect, i.e. (B & E) if (!BCst) return nullptr; - ConstantInt *DCst = dyn_cast(D); - if (!DCst) + if (!ECst) return nullptr; - if (Mask & (Mask_NotAllZeros | BMask_NotAllOnes)) { - // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and - // (icmp ne (A & B), B) & (icmp ne (A & D), D) - // -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0) + if (DIsA && Mask & (Mask_NotAllZeros | BMask_NotAllOnes)) { + // (icmp ne (A & B), 0) & (icmp ne (A & E), 0) and + // (icmp ne (A & B), B) & (icmp ne (A & E), E) + // -> (icmp ne (A & B), 0) or (icmp ne (A & E), 0) // Only valid if one of the masks is a superset of the other (check "B&D" is - // the same as either B or D). - APInt NewMask = BCst->getValue() & DCst->getValue(); + // the same as either B or E). + APInt NewMask = BCst->getValue() & ECst->getValue(); if (NewMask == BCst->getValue()) return LHS; - else if (NewMask == DCst->getValue()) + else if (NewMask == ECst->getValue()) return RHS; } - if (Mask & AMask_NotAllOnes) { - // (icmp ne (A & B), B) & (icmp ne (A & D), D) - // -> (icmp ne (A & B), A) or (icmp ne (A & D), A) - // Only valid if one of the masks is a superset of the other (check "B|D" is - // the same as either B or D). - APInt NewMask = BCst->getValue() | DCst->getValue(); + if (DIsA && Mask & AMask_NotAllOnes) { + // (icmp ne (A & B), B) & (icmp ne (A & E), E) + // -> (icmp ne (A & B), A) or (icmp ne (A & E), A) + // Only valid if one of the masks is a superset of the other (check "B|E" is + // the same as either B or E). + APInt NewMask = BCst->getValue() | ECst->getValue(); if (NewMask == BCst->getValue()) return LHS; - else if (NewMask == DCst->getValue()) + else if (NewMask == ECst->getValue()) return RHS; } - if (Mask & BMask_Mixed) { - // (icmp eq (A & B), C) & (icmp eq (A & D), E) - // We already know that B & C == C && D & E == E. - // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of - // C and E, which are shared by both the mask B and the mask D, don't - // contradict, then we can transform to - // -> (icmp eq (A & (B|D)), (C|E)) - // Currently, we only handle the case of B, C, D, and E being constant. - // We can't simply use C and E because we might actually handle - // (icmp ne (A & B), B) & (icmp eq (A & D), D) - // with B and D, having a single bit set. - ConstantInt *CCst = dyn_cast(C); - if (!CCst) - return nullptr; - ConstantInt *ECst = dyn_cast(E); - if (!ECst) - return nullptr; - if (PredL != NewCC) - CCst = cast(ConstantExpr::getXor(BCst, CCst)); - if (PredR != NewCC) - ECst = cast(ConstantExpr::getXor(DCst, ECst)); - - // If there is a conflict, we should actually return a false for the - // whole construct. - if (((BCst->getValue() & DCst->getValue()) & - (CCst->getValue() ^ ECst->getValue())).getBoolValue()) - return ConstantInt::get(LHS->getType(), !IsAnd); - - Value *NewOr1 = Builder.CreateOr(B, D); - Value *NewOr2 = ConstantExpr::getOr(CCst, ECst); - Value *NewAnd = Builder.CreateAnd(A, NewOr1); - return Builder.CreateICmp(NewCC, NewAnd, NewOr2); - } - return nullptr; } @@ -1046,7 +947,7 @@ } } - // handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E) + // handle: (icmp eq/ne (A & B), C) & D if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, true, Builder)) return V; Index: test/Transforms/InstCombine/and-or-icmps.ll =================================================================== --- test/Transforms/InstCombine/and-or-icmps.ll +++ test/Transforms/InstCombine/and-or-icmps.ll @@ -3,7 +3,7 @@ define i1 @PR1817_1(i32 %X) { ; CHECK-LABEL: @PR1817_1( -; CHECK-NEXT: [[B:%.*]] = icmp ult i32 %X, 10 +; CHECK-NEXT: [[B:%.*]] = icmp ult i32 [[X:%.*]], 10 ; CHECK-NEXT: ret i1 [[B]] ; %A = icmp slt i32 %X, 10 @@ -14,7 +14,7 @@ define i1 @PR1817_2(i32 %X) { ; CHECK-LABEL: @PR1817_2( -; CHECK-NEXT: [[A:%.*]] = icmp slt i32 %X, 10 +; CHECK-NEXT: [[A:%.*]] = icmp slt i32 [[X:%.*]], 10 ; CHECK-NEXT: ret i1 [[A]] ; %A = icmp slt i32 %X, 10 @@ -25,7 +25,7 @@ define i1 @PR2330(i32 %a, i32 %b) { ; CHECK-LABEL: @PR2330( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %b, %a +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 8 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -41,7 +41,7 @@ define i1 @or_eq_with_one_bit_diff_constants1(i32 %x) { ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants1( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %x, 1 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X:%.*]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 51 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -55,7 +55,7 @@ define i1 @and_ne_with_one_bit_diff_constants1(i32 %x) { ; CHECK-LABEL: @and_ne_with_one_bit_diff_constants1( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %x, 1 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X:%.*]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 51 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -69,7 +69,7 @@ define i1 @or_eq_with_one_bit_diff_constants2(i32 %x) { ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %x, 32 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X:%.*]], 32 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 97 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -81,7 +81,7 @@ define i1 @and_ne_with_one_bit_diff_constants2(i19 %x) { ; CHECK-LABEL: @and_ne_with_one_bit_diff_constants2( -; CHECK-NEXT: [[TMP1:%.*]] = or i19 %x, 128 +; CHECK-NEXT: [[TMP1:%.*]] = or i19 [[X:%.*]], 128 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i19 [[TMP1]], 193 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -95,7 +95,7 @@ define i1 @or_eq_with_one_bit_diff_constants3(i8 %x) { ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants3( -; CHECK-NEXT: [[TMP1:%.*]] = or i8 %x, -128 +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], -128 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], -2 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -107,7 +107,7 @@ define i1 @and_ne_with_one_bit_diff_constants3(i8 %x) { ; CHECK-LABEL: @and_ne_with_one_bit_diff_constants3( -; CHECK-NEXT: [[TMP1:%.*]] = or i8 %x, -128 +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], -128 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP1]], -63 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -122,7 +122,7 @@ define i1 @or_eq_with_diff_one(i8 %x) { ; CHECK-LABEL: @or_eq_with_diff_one( -; CHECK-NEXT: [[TMP1:%.*]] = add i8 %x, -13 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], -13 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -136,7 +136,7 @@ define i1 @and_ne_with_diff_one(i32 %x) { ; CHECK-LABEL: @and_ne_with_diff_one( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 %x, -39 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -39 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -151,7 +151,7 @@ define i1 @or_eq_with_diff_one_signed(i32 %x) { ; CHECK-LABEL: @or_eq_with_diff_one_signed( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 %x, 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -163,7 +163,7 @@ define i1 @and_ne_with_diff_one_signed(i64 %x) { ; CHECK-LABEL: @and_ne_with_diff_one_signed( -; CHECK-NEXT: [[TMP1:%.*]] = add i64 %x, 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[X:%.*]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -177,7 +177,7 @@ define <2 x i1> @or_eq_with_one_bit_diff_constants2_splatvec(<2 x i32> %x) { ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> %x, +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; @@ -189,7 +189,7 @@ define <2 x i1> @and_ne_with_diff_one_splatvec(<2 x i32> %x) { ; CHECK-LABEL: @and_ne_with_diff_one_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> %x, +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; @@ -253,3 +253,95 @@ ret void } +define i1 @two_vars_both_eq(i32 %a, i32 %b) { +; CHECK-LABEL: @two_vars_both_eq( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 22 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 18 +; CHECK-NEXT: ret i1 [[TMP3]] +; + %tmp1 = and i32 %a, 20 + %tmp2 = icmp eq i32 %tmp1, 16 + %tmp3 = and i32 %b, 6 + %tmp4 = icmp eq i32 %tmp3, 2 + %tmp5 = and i1 %tmp2, %tmp4 + ret i1 %tmp5 +} +define i1 @four_vars_both_eq1(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: @four_vars_both_eq1( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A:%.*]], [[C:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[B:%.*]], [[D:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 23 +; CHECK-NEXT: ret i1 [[TMP4]] +; + %tmp1 = and i32 %a, %b + %tmp2 = icmp eq i32 %tmp1, 16 + %tmp3 = and i32 %c, %d + %tmp4 = icmp eq i32 %tmp3, 7 + %tmp5 = and i1 %tmp2, %tmp4 + ret i1 %tmp5 +} +define i1 @four_vars_both_eq2(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: @four_vars_both_eq2( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A:%.*]], [[C:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 85 +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[B:%.*]], [[D:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret i1 [[TMP4]] +; + %tmp1 = and i32 %a, 20 + %tmp2 = icmp eq i32 %tmp1, %b + %tmp3 = and i32 %c, 65 + %tmp4 = icmp eq i32 %tmp3, %d + %tmp5 = and i1 %tmp2, %tmp4 + ret i1 %tmp5 +} +; This works because the ne gets normalized +define i1 @two_vars_ne_single_bit(i32 %a, i32 %b) { +; CHECK-LABEL: @two_vars_ne_single_bit( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 22 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 16 +; CHECK-NEXT: ret i1 [[TMP3]] +; + %tmp1 = and i32 %a, 20 + %tmp2 = icmp eq i32 %tmp1, 16 + %tmp3 = and i32 %b, 2 + %tmp4 = icmp ne i32 %tmp3, 2 + %tmp5 = and i1 %tmp2, %tmp4 + ret i1 %tmp5 +} +define i1 @four_vars_ne_single_bit(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: @four_vars_ne_single_bit( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B:%.*]], [[D:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 64 +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[A:%.*]], [[C:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 84 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], [[TMP2]] +; CHECK-NEXT: ret i1 [[TMP5]] +; + %tmp1 = and i32 %a, 20 + %tmp2 = icmp eq i32 %tmp1, %b + %tmp3 = and i32 %c, 64 + %tmp4 = icmp ne i32 %tmp3, %d + %tmp5 = and i1 %tmp2, %tmp4 + ret i1 %tmp5 +} + +define i1 @four_vars_two_ne_single_bit(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: @four_vars_two_ne_single_bit( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B:%.*]], [[D:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 68 +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[A:%.*]], [[C:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 68 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], [[TMP2]] +; CHECK-NEXT: ret i1 [[TMP5]] +; + %tmp1 = and i32 %a, 4 + %tmp2 = icmp ne i32 %tmp1, %b + %tmp3 = and i32 %c, 64 + %tmp4 = icmp ne i32 %tmp3, %d + %tmp5 = and i1 %tmp2, %tmp4 + ret i1 %tmp5 +}