Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -307,15 +307,17 @@ /// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E). /// Return the set of pattern classes (from MaskedICmpType) that both LHS and /// RHS satisfy. -static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C, - Value *&D, Value *&E, ICmpInst *LHS, - ICmpInst *RHS, - ICmpInst::Predicate &PredL, - ICmpInst::Predicate &PredR) { +static +Optional> +getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C, + Value *&D, Value *&E, ICmpInst *LHS, + ICmpInst *RHS, + ICmpInst::Predicate &PredL, + ICmpInst::Predicate &PredR) { // vectors are not (yet?) supported. Don't support pointers either. if (!LHS->getOperand(0)->getType()->isIntegerTy() || !RHS->getOperand(0)->getType()->isIntegerTy()) - return 0; + return None; // Here comes the tricky part: // LHS might be of the form L11 & L12 == X, X == L21 & L22, @@ -346,7 +348,7 @@ // Bail if LHS was a icmp that can't be decomposed into an equality. if (!ICmpInst::isEquality(PredL)) - return 0; + return None; Value *R1 = RHS->getOperand(0); Value *R2 = RHS->getOperand(1); @@ -360,7 +362,7 @@ A = R12; D = R11; } else { - return 0; + return None; } E = R2; R1 = nullptr; @@ -388,7 +390,7 @@ // Bail if RHS was a icmp that can't be decomposed into an equality. if (!ICmpInst::isEquality(PredR)) - return 0; + return None; // Look for ANDs on the right side of the RHS icmp. if (!Ok) { @@ -408,11 +410,11 @@ E = R1; Ok = true; } else { - return 0; + return None; } } if (!Ok) - return 0; + return None; if (L11 == A) { B = L12; @@ -430,7 +432,130 @@ unsigned LeftType = getMaskedICmpType(A, B, C, PredL); unsigned RightType = getMaskedICmpType(A, D, E, PredR); - return LeftType & RightType; + return Optional>(std::make_pair(LeftType, RightType)); +} + +static Value * foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( + ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, + Value *A, Value *B, Value *C, Value *D, Value *E, + ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, + unsigned LHSMask, unsigned RHSMask) { + // We are given the canonical form: + // (icmp ne (A & B), 0) && (icmp eq (A & D), E). + // where D & E == E. + // + // If IsAnd is false, we get it in negated form: + // (icmp eq (A & B), 0) || (icmp ne (A & D), E) -> + // !((icmp ne (A & B), 0) && (icmp eq (A & D), E)). + // + // We currently handle the case of B, C, D, E are constant. + // + ConstantInt *BCst = dyn_cast(B); + if (!BCst) + return nullptr; + ConstantInt *CCst = dyn_cast(C); + if (!CCst) + return nullptr; + ConstantInt *DCst = dyn_cast(D); + if (!DCst) + return nullptr; + ConstantInt *ECst = dyn_cast(E); + if (!ECst) + return nullptr; + + // Update E to the canonical form when D is a power of two and RHS is + // canonicalized as, + // (icmp ne (A & D), 0) -> (icmp eq (A & D), D) or + // (icmp ne (A & D), D) -> (icmp eq (A & D, 0). + if (PredR != (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) + ECst = cast(ConstantExpr::getXor(DCst, ECst)); + + // If B or D is zero, skip because if LHS or RHS can be trivially folded by + // other folding rules and this pattern won't apply any more. + if (BCst->getValue() == 0 || DCst->getValue() == 0) + return nullptr; + + // If B and D don't intersect, ie. (B & D) == 0, no folding because we can't + // deduce anything from it. + // For example, + // (icmp ne (A & 12), 0) && (icmp eq (A & 3), 1) -> no folding. + if ((BCst->getValue() & DCst->getValue()) == 0) + return nullptr; + + // In the following, we consider only the cases where B is a superset of D, B + // is a subset of D, or B == D because otherwise there's at least one bit + // covered by B but not D, in which case we can't deduce anything from it, so + // no folding. + // For example, + // (icmp ne (A & 12), 0) && (icmp eq (A & 7), 1) -> no folding. + if ((BCst->getValue() & DCst->getValue()) != BCst->getValue() && + (BCst->getValue() & DCst->getValue()) != DCst->getValue()) + return nullptr; + + // At this point, either B is a superset of D, B is a subset of D or B == D. + + // If E is zero, if B is a subset of (or equal to) D, LHS and RHS contradict + // and the whole expression becomes false (or true if negated), otherwise, no + // folding. + // For example, + // (icmp ne (A & 3), 0) && (icmp eq (A & 7), 0) -> false. + // (icmp ne (A & 15), 0) && (icmp eq (A & 7), 0) -> no folding. + if (ECst->isZero()) { + if ((BCst->getValue() & DCst->getValue()) == BCst->getValue()) + return ConstantInt::get(LHS->getType(), !IsAnd); + return nullptr; + } + + // At this point, B, D, E aren't zero and (B & D) == B, (B & D) == D or B == + // D. If B is a superset of (or equal to) D, since E is not zero, LHS is + // subsumed by RHS (RHS implies LHS.) So the whole expression becomes + // RHS. Otherwise, B is a subset of D. If B and E have a common bit set, + // ie. (B & E) != 0, then LHS is subsumed by RHS, otherwise, LHS and RHS + // contradict and the whole expression becomes false (or true if negated.) + // For example, + // (icmp ne (A & 255), 0) && (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). + // (icmp ne (A & 15), 0) && (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). + // (icmp ne (A & 12), 0) && (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). + // (icmp ne (A & 7), 0) && (icmp eq (A & 15), 8) -> false. + if ((BCst->getValue() & DCst->getValue()) == DCst->getValue()) + return RHS; + assert((BCst->getValue() & DCst->getValue()) == BCst->getValue() && + "Precondition due to above code"); + if ((BCst->getValue() & ECst->getValue()) != 0) + return RHS; + return ConstantInt::get(LHS->getType(), !IsAnd); +} + +static Value *foldLogOpOfMaskedICmpsAsymmetric( + ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, + Value *A, Value *B, Value *C, Value *D, Value *E, + ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, + unsigned LHSMask, unsigned RHSMask) { + assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) && + "Expected equality predicates for masked type of icmps."); + // Handle Mask_NotAllZeros-BMask_Mixed cases. + // (icmp ne/eq (A & B), C) &&/|| (icmp eq/ne (A & D), E), or + // (icmp eq/ne (A & B), C) &&/|| (icmp ne/eq (A & D), E) + // which gets swapped to + // (icmp ne/eq (A & D), E) &&/|| (icmp eq/ne (A & B), C). + if (!IsAnd) { + LHSMask = conjugateICmpMask(LHSMask); + RHSMask = conjugateICmpMask(RHSMask); + } + if ((LHSMask & Mask_NotAllZeros) && (RHSMask & BMask_Mixed)) { + if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( + LHS, RHS, IsAnd, A, B, C, D, E, + PredL, PredR, LHSMask, RHSMask)) { + return V; + } + } else if ((LHSMask & BMask_Mixed) && (RHSMask & Mask_NotAllZeros)) { + if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( + RHS, LHS, IsAnd, A, D, E, B, C, + PredR, PredL, RHSMask, LHSMask)) { + return V; + } + } + return nullptr; } /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) @@ -439,13 +564,23 @@ llvm::InstCombiner::BuilderTy &Builder) { Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); - unsigned Mask = + Optional> MaskPair = getMaskedTypeForICmpPair(A, B, C, D, E, LHS, RHS, PredL, PredR); - if (Mask == 0) + if (!MaskPair) return nullptr; - assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) && "Expected equality predicates for masked type of icmps."); + unsigned LHSMask = MaskPair->first; + unsigned RHSMask = MaskPair->second; + unsigned Mask = LHSMask & RHSMask; + if (Mask == 0) { + // Even if the two sides don't share a common pattern, check if folding can + // still happen. + if (Value *V = foldLogOpOfMaskedICmpsAsymmetric( + LHS, RHS, IsAnd, A, B, C, D, E, PredL, PredR, LHSMask, RHSMask)) + return V; + return nullptr; + } // In full generality: // (icmp (A & B) Op C) | (icmp (A & D) Op E) Index: test/Transforms/InstCombine/icmp-logical.ll =================================================================== --- test/Transforms/InstCombine/icmp-logical.ll +++ test/Transforms/InstCombine/icmp-logical.ll @@ -183,3 +183,260 @@ ret <2 x i1> %cmp } +; ((X & 12) != 0 && (X & 3) == 1) -> no change +define i1 @and_masked_icmps_mask_notallzeros_bmask_mixed_0(i32 %x) { +; CHECK-LABEL: @and_masked_icmps_mask_notallzeros_bmask_mixed_0( +; CHECK-NEXT: [[AND1:%.*]] = and i32 %x, 12 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[AND1]], 0 +; CHECK-NEXT: [[AND2:%.*]] = and i32 %x, 3 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[AND2]], 1 +; CHECK-NEXT: [[AND3:%.*]] = and i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[AND3]] +; + %1 = and i32 %x, 12 + %2 = icmp ne i32 %1, 0 + %3 = and i32 %x, 3 + %4 = icmp eq i32 %3, 1 + %5 = and i1 %2, %4 + ret i1 %5 +} + +; ((X & 12) != 0 && (X & 7) == 1) -> no change +define i1 @and_masked_icmps_mask_notallzeros_bmask_mixed_1(i32 %x) { +; CHECK-LABEL: @and_masked_icmps_mask_notallzeros_bmask_mixed_1( +; CHECK-NEXT: [[AND1:%.*]] = and i32 %x, 12 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[AND1]], 0 +; CHECK-NEXT: [[AND2:%.*]] = and i32 %x, 7 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[AND2]], 1 +; CHECK-NEXT: [[AND3:%.*]] = and i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[AND3]] +; + %1 = and i32 %x, 12 + %2 = icmp ne i32 %1, 0 + %3 = and i32 %x, 7 + %4 = icmp eq i32 %3, 1 + %5 = and i1 %2, %4 + ret i1 %5 +} + +; ((X & 3) != 0 && (X & 7) == 0) -> false +define i1 @and_masked_icmps_mask_notallzeros_bmask_mixed_2(i32 %x) { +; CHECK-LABEL: @and_masked_icmps_mask_notallzeros_bmask_mixed_2( +; CHECK-NEXT: ret i1 false +; + %1 = and i32 %x, 3 + %2 = icmp ne i32 %1, 0 + %3 = and i32 %x, 7 + %4 = icmp eq i32 %3, 0 + %5 = and i1 %2, %4 + ret i1 %5 +} + +; ((X & 15) != 0 && (X & 7) == 0) -> no change +define i1 @and_masked_icmps_mask_notallzeros_bmask_mixed_3(i32 %x) { +; CHECK-LABEL: @and_masked_icmps_mask_notallzeros_bmask_mixed_3( +; CHECK-NEXT: [[AND1:%.*]] = and i32 %x, 15 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[AND1]], 0 +; CHECK-NEXT: [[AND2:%.*]] = and i32 %x, 7 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[AND2]], 0 +; CHECK-NEXT: [[AND3:%.*]] = and i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[AND3]] +; + %1 = and i32 %x, 15 + %2 = icmp ne i32 %1, 0 + %3 = and i32 %x, 7 + %4 = icmp eq i32 %3, 0 + %5 = and i1 %2, %4 + ret i1 %5 +} + +; ((X & 255) != 0 && (X & 15) == 8) -> (X & 15) == 8 +define i1 @and_masked_icmps_mask_notallzeros_bmask_mixed_4(i32 %x) { +; CHECK-LABEL: @and_masked_icmps_mask_notallzeros_bmask_mixed_4( +; CHECK-NEXT: [[AND1:%.*]] = and i32 %x, 15 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[AND1]], 8 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %1 = and i32 %x, 255 + %2 = icmp ne i32 %1, 0 + %3 = and i32 %x, 15 + %4 = icmp eq i32 %3, 8 + %5 = and i1 %2, %4 + ret i1 %5 +} + +; ((X & 15) != 0 && (X & 15) == 8) -> (X & 15) == 8 +define i1 @and_masked_icmps_mask_notallzeros_bmask_mixed_5(i32 %x) { +; CHECK-LABEL: @and_masked_icmps_mask_notallzeros_bmask_mixed_5( +; CHECK-NEXT: [[AND1:%.*]] = and i32 %x, 15 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[AND1]], 8 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %1 = and i32 %x, 15 + %2 = icmp ne i32 %1, 0 + %3 = and i32 %x, 15 + %4 = icmp eq i32 %3, 8 + %5 = and i1 %2, %4 + ret i1 %5 +} + +; ((X & 12) != 0 && (X & 15) == 8) -> (X & 15) == 8 +define i1 @and_masked_icmps_mask_notallzeros_bmask_mixed_6(i32 %x) { +; CHECK-LABEL: @and_masked_icmps_mask_notallzeros_bmask_mixed_6( +; CHECK-NEXT: [[AND1:%.*]] = and i32 %x, 15 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[AND1]], 8 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %1 = and i32 %x, 12 + %2 = icmp ne i32 %1, 0 + %3 = and i32 %x, 15 + %4 = icmp eq i32 %3, 8 + %5 = and i1 %2, %4 + ret i1 %5 +} + +; ((X & 7) != 0 && (X & 15) == 8) -> false +define i1 @and_masked_icmps_mask_notallzeros_bmask_mixed_7(i32 %x) { +; CHECK-LABEL: @and_masked_icmps_mask_notallzeros_bmask_mixed_7( +; CHECK-NEXT: ret i1 false +; + %1 = and i32 %x, 7 + %2 = icmp ne i32 %1, 0 + %3 = and i32 %x, 15 + %4 = icmp eq i32 %3, 8 + %5 = and i1 %2, %4 + ret i1 %5 +} + +; ((X & 12) == 0 || (X & 3) != 1) -> !((X & 12) != 0 && (X & 3) == 1)) -> +; no change +define i1 @or_masked_icmps_mask_notallzeros_bmask_mixed_0(i32 %x) { +; CHECK-LABEL: @or_masked_icmps_mask_notallzeros_bmask_mixed_0( +; CHECK-NEXT: [[AND1:%.*]] = and i32 %x, 12 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[AND1]], 0 +; CHECK-NEXT: [[AND2:%.*]] = and i32 %x, 3 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[AND2]], 1 +; CHECK-NEXT: [[OR1:%.*]] = or i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[OR1]] +; + %1 = and i32 %x, 12 + %2 = icmp eq i32 %1, 0 + %3 = and i32 %x, 3 + %4 = icmp ne i32 %3, 1 + %5 = or i1 %2, %4 + ret i1 %5 +} + +; ((X & 12) == 0 || (X & 7) != 1) -> !((X & 12) != 0 && (X & 7) == 1) -> +; no change. +define i1 @or_masked_icmps_mask_notallzeros_bmask_mixed_1(i32 %x) { +; CHECK-LABEL: @or_masked_icmps_mask_notallzeros_bmask_mixed_1( +; CHECK-NEXT: [[AND1:%.*]] = and i32 %x, 12 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[AND1]], 0 +; CHECK-NEXT: [[AND2:%.*]] = and i32 %x, 7 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[AND2]], 1 +; CHECK-NEXT: [[OR1:%.*]] = or i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[OR1]] +; + %1 = and i32 %x, 12 + %2 = icmp eq i32 %1, 0 + %3 = and i32 %x, 7 + %4 = icmp ne i32 %3, 1 + %5 = or i1 %2, %4 + ret i1 %5 +} + +; ((X & 3) == 0 || (X & 7) != 0) -> !((X & 3) != 0 && (X & 7) == 0) -> +; !(false) -> true +define i1 @or_masked_icmps_mask_notallzeros_bmask_mixed_2(i32 %x) { +; CHECK-LABEL: @or_masked_icmps_mask_notallzeros_bmask_mixed_2( +; CHECK-NEXT: ret i1 true +; + %1 = and i32 %x, 3 + %2 = icmp eq i32 %1, 0 + %3 = and i32 %x, 7 + %4 = icmp ne i32 %3, 0 + %5 = or i1 %2, %4 + ret i1 %5 +} + +; ((X & 15) == 0 || (X & 7) != 0) -> !((X & 15) != 0 && (X & 7) == 0) -> +; no change. +define i1 @or_masked_icmps_mask_notallzeros_bmask_mixed_3(i32 %x) { +; CHECK-LABEL: @or_masked_icmps_mask_notallzeros_bmask_mixed_3( +; CHECK-NEXT: [[AND1:%.*]] = and i32 %x, 15 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[AND1]], 0 +; CHECK-NEXT: [[AND2:%.*]] = and i32 %x, 7 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[AND2]], 0 +; CHECK-NEXT: [[OR1:%.*]] = or i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[OR1]] +; + %1 = and i32 %x, 15 + %2 = icmp eq i32 %1, 0 + %3 = and i32 %x, 7 + %4 = icmp ne i32 %3, 0 + %5 = or i1 %2, %4 + ret i1 %5 +} + +; ((X & 255) == 0 || (X & 15) != 8) -> !(((X & 255) != 0 && (X & 15) == 8)) -> +; !((X & 15) == 8) -> ((X & 15) != 8) +define i1 @or_masked_icmps_mask_notallzeros_bmask_mixed_4(i32 %x) { +; CHECK-LABEL: @or_masked_icmps_mask_notallzeros_bmask_mixed_4( +; CHECK-NEXT: [[AND1:%.*]] = and i32 %x, 15 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[AND1]], 8 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %1 = and i32 %x, 255 + %2 = icmp eq i32 %1, 0 + %3 = and i32 %x, 15 + %4 = icmp ne i32 %3, 8 + %5 = or i1 %2, %4 + ret i1 %5 +} + +; ((X & 15) == 0 || (X & 15) != 8) -> !(((X & 15) != 0 && (X & 15) == 8)) -> +; !((X & 15) == 8) -> ((X & 15) != 8) +define i1 @or_masked_icmps_mask_notallzeros_bmask_mixed_5(i32 %x) { +; CHECK-LABEL: @or_masked_icmps_mask_notallzeros_bmask_mixed_5( +; CHECK-NEXT: [[AND1:%.*]] = and i32 %x, 15 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[AND1]], 8 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %1 = and i32 %x, 15 + %2 = icmp eq i32 %1, 0 + %3 = and i32 %x, 15 + %4 = icmp ne i32 %3, 8 + %5 = or i1 %2, %4 + ret i1 %5 +} + +; ((X & 12) == 0 || (X & 15) != 8) -> !(((X & 12) != 0 && (X & 15) == 8)) -> +; !((X & 15) == 8) -> ((X & 15) != 8 +define i1 @or_masked_icmps_mask_notallzeros_bmask_mixed_6(i32 %x) { +; CHECK-LABEL: @or_masked_icmps_mask_notallzeros_bmask_mixed_6( +; CHECK-NEXT: [[AND1:%.*]] = and i32 %x, 15 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[AND1]], 8 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %1 = and i32 %x, 12 + %2 = icmp eq i32 %1, 0 + %3 = and i32 %x, 15 + %4 = icmp ne i32 %3, 8 + %5 = or i1 %2, %4 + ret i1 %5 +} + +; ((X & 7) == 0 || (X & 15) != 8) -> !(((X & 7) != 0 && (X & 15) == 8)) -> +; !(false) -> true +define i1 @or_masked_icmps_mask_notallzeros_bmask_mixed_7(i32 %x) { +; CHECK-LABEL: @or_masked_icmps_mask_notallzeros_bmask_mixed_7( +; CHECK-NEXT: ret i1 true +; + %1 = and i32 %x, 7 + %2 = icmp eq i32 %1, 0 + %3 = and i32 %x, 15 + %4 = icmp ne i32 %3, 8 + %5 = or i1 %2, %4 + ret i1 %5 +}