Index: lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSelect.cpp +++ lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -70,6 +70,111 @@ return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B); } +/// If one of the constants is zero (we know they can't both be) and we have an +/// icmp instruction with zero, and we have an 'and' with the non-constant value +/// and a power of two we can turn the select into a shift on the result of the +/// 'and'. +/// This folds: +/// select (icmp eq (and X, C1)), C2, C3 +/// iff C1 is a power 2 and the difference between C2 and C3 is a power of 2. +/// To something like: +/// (shr (and (X, C1)), (log2(C1) - log2(C2-C3))) + C3 +/// Or: +/// (shl (and (X, C1)), (log2(C2-C3) - log2(C1))) + C3 +/// With some variations depending if C3 is larger than C2, or the shift +/// isn't needed, or the bit widths don't match. +static Value *foldSelectICmpAnd(Type *SelType, const ICmpInst *IC, + APInt TrueVal, APInt FalseVal, + InstCombiner::BuilderTy &Builder) { + assert(SelType->isIntOrIntVectorTy() && "Not an integer select?"); + + // If this is a vector select, we need a vector compare. + if (SelType->isVectorTy() != IC->getType()->isVectorTy()) + return nullptr; + + Value *V; + APInt AndMask; + bool CreateAnd = false; + ICmpInst::Predicate Pred = IC->getPredicate(); + if (ICmpInst::isEquality(Pred)) { + if (!match(IC->getOperand(1), m_Zero())) + return nullptr; + + V = IC->getOperand(0); + + const APInt *AndRHS; + if (!match(V, m_And(m_Value(), m_Power2(AndRHS)))) + return nullptr; + + AndMask = *AndRHS; + } else if (decomposeBitTestICmp(IC->getOperand(0), IC->getOperand(1), + Pred, V, AndMask)) { + assert(ICmpInst::isEquality(Pred) && "Not equality test?"); + + if (!AndMask.isPowerOf2()) + return nullptr; + + CreateAnd = true; + } else { + return nullptr; + } + + // If both select arms are non-zero see if we have a select of the form + // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic + // for 'x ? 2^n : 0' and fix the thing up at the end. + APInt Offset(TrueVal.getBitWidth(), 0); + if (!TrueVal.isNullValue() && !FalseVal.isNullValue()) { + if ((TrueVal - FalseVal).isPowerOf2()) + Offset = FalseVal; + else if ((FalseVal - TrueVal).isPowerOf2()) + Offset = TrueVal; + else + return nullptr; + + // Adjust TrueVal and FalseVal to the offset. + TrueVal -= Offset; + FalseVal -= Offset; + } + + // Make sure one of the select arms is a power of 2. + if (!TrueVal.isPowerOf2() && !FalseVal.isPowerOf2()) + return nullptr; + + // Determine which shift is needed to transform result of the 'and' into the + // desired result. + const APInt &ValC = !TrueVal.isNullValue() ? TrueVal : FalseVal; + unsigned ValZeros = ValC.logBase2(); + unsigned AndZeros = AndMask.logBase2(); + + if (CreateAnd) { + // Insert the AND instruction on the input to the truncate. + V = Builder.CreateAnd(V, ConstantInt::get(V->getType(), AndMask)); + } + + // If types don't match we can still convert the select by introducing a zext + // or a trunc of the 'and'. + if (ValZeros > AndZeros) { + V = Builder.CreateZExtOrTrunc(V, SelType); + V = Builder.CreateShl(V, ValZeros - AndZeros); + } else if (ValZeros < AndZeros) { + V = Builder.CreateLShr(V, AndZeros - ValZeros); + V = Builder.CreateZExtOrTrunc(V, SelType); + } else + V = Builder.CreateZExtOrTrunc(V, SelType); + + // Okay, now we know that everything is set up, we just don't know whether we + // have a icmp_ne or icmp_eq and whether the true or false val is the zero. + bool ShouldNotVal = !TrueVal.isNullValue(); + ShouldNotVal ^= Pred == ICmpInst::ICMP_NE; + if (ShouldNotVal) + V = Builder.CreateXor(V, ValC); + + // Apply an offset if needed. + if (!Offset.isNullValue()) + V = Builder.CreateAdd(V, ConstantInt::get(V->getType(), Offset)); + return V; +} + /// We want to turn code that looks like this: /// %C = or %A, %B /// %D = select %cond, %C, %A @@ -101,7 +206,7 @@ /// For the same transformation as the previous function, return the identity /// constant that goes into the select. -static Constant *getSelectFoldableConstant(BinaryOperator *I) { +static APInt getSelectFoldableConstant(BinaryOperator *I) { switch (I->getOpcode()) { default: llvm_unreachable("This cannot happen!"); case Instruction::Add: @@ -111,11 +216,11 @@ case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: - return Constant::getNullValue(I->getType()); + return APInt::getNullValue(I->getType()->getScalarSizeInBits()); case Instruction::And: - return Constant::getAllOnesValue(I->getType()); + return APInt::getAllOnesValue(I->getType()->getScalarSizeInBits()); case Instruction::Mul: - return ConstantInt::get(I->getType(), 1); + return APInt(I->getType()->getScalarSizeInBits(), 1); } } @@ -219,16 +324,11 @@ return BinaryOperator::Create(BO->getOpcode(), Op0, Op1); } -static bool isSelect01(Constant *C1, Constant *C2) { - const APInt *C1I, *C2I; - if (!match(C1, m_APInt(C1I))) - return false; - if (!match(C2, m_APInt(C2I))) +static bool isSelect01(const APInt &C1I, const APInt &C2I) { + if (!C1I.isNullValue() && !C2I.isNullValue()) // One side must be zero. return false; - if (!C1I->isNullValue() && !C2I->isNullValue()) // One side must be zero. - return false; - return C1I->isOneValue() || C1I->isAllOnesValue() || - C2I->isOneValue() || C2I->isAllOnesValue(); + return C1I.isOneValue() || C1I.isAllOnesValue() || + C2I.isOneValue() || C2I.isAllOnesValue(); } /// Try to fold the select into one of the operands to allow further @@ -248,11 +348,14 @@ } if (OpToFold) { - Constant *C = getSelectFoldableConstant(TVI); + APInt CI = getSelectFoldableConstant(TVI); Value *OOp = TVI->getOperand(2-OpToFold); // Avoid creating select between 2 constants unless it's selecting // between 0, 1 and -1. - if (!isa(OOp) || isSelect01(C, cast(OOp))) { + const APInt *OOpC; + bool OOpIsAPInt = match(OOp, m_APInt(OOpC)); + if (!isa(OOp) || (OOpIsAPInt && isSelect01(CI, *OOpC))) { + Value *C = ConstantInt::get(OOp->getType(), CI); Value *NewSel = Builder.CreateSelect(SI.getCondition(), OOp, C); NewSel->takeName(TVI); BinaryOperator *BO = BinaryOperator::Create(TVI->getOpcode(), @@ -260,6 +363,20 @@ BO->copyIRFlags(TVI); return BO; } + // We might be able to simplify this further if we have two constants + // and the condition is a bit test. + // TODO: Not handling shifts for now. + if (OOpIsAPInt && !TVI->isShift()) { + if (auto *ICI = dyn_cast(SI.getCondition())) { + if (Value *V = foldSelectICmpAnd(SI.getType(), ICI, *OOpC, CI, + Builder)) { + BinaryOperator *BO = BinaryOperator::Create(TVI->getOpcode(), + FalseVal, V); + BO->copyIRFlags(TVI); + return BO; + } + } + } } } } @@ -276,11 +393,14 @@ } if (OpToFold) { - Constant *C = getSelectFoldableConstant(FVI); + APInt CI = getSelectFoldableConstant(FVI); Value *OOp = FVI->getOperand(2-OpToFold); // Avoid creating select between 2 constants unless it's selecting // between 0, 1 and -1. - if (!isa(OOp) || isSelect01(C, cast(OOp))) { + const APInt *OOpC; + bool OOpIsAPInt = match(OOp, m_APInt(OOpC)); + if (!isa(OOp) || (OOpIsAPInt && isSelect01(CI, *OOpC))) { + Value *C = ConstantInt::get(OOp->getType(), CI); Value *NewSel = Builder.CreateSelect(SI.getCondition(), C, OOp); NewSel->takeName(FVI); BinaryOperator *BO = BinaryOperator::Create(FVI->getOpcode(), @@ -288,6 +408,20 @@ BO->copyIRFlags(FVI); return BO; } + // We might be able to simplify this further if we have two constants + // and the condition is a bit test. + // TODO: Not handling shifts for now. + if (OOpIsAPInt && !FVI->isShift()) { + if (auto *ICI = dyn_cast(SI.getCondition())) { + if (Value *V = foldSelectICmpAnd(SI.getType(), ICI, CI, *OOpC, + Builder)) { + BinaryOperator *BO = BinaryOperator::Create(FVI->getOpcode(), + TrueVal, V); + BO->copyIRFlags(FVI); + return BO; + } + } + } } } } @@ -296,109 +430,6 @@ return nullptr; } -/// We want to turn: -/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2)) -/// into: -/// (or (shl (and X, C1), C3), Y) -/// iff: -/// C1 and C2 are both powers of 2 -/// where: -/// C3 = Log(C2) - Log(C1) -/// -/// This transform handles cases where: -/// 1. The icmp predicate is inverted -/// 2. The select operands are reversed -/// 3. The magnitude of C2 and C1 are flipped -static Value *foldSelectICmpAndOr(const ICmpInst *IC, Value *TrueVal, - Value *FalseVal, - InstCombiner::BuilderTy &Builder) { - // Only handle integer compares. Also, if this is a vector select, we need a - // vector compare. - if (!TrueVal->getType()->isIntOrIntVectorTy() || - TrueVal->getType()->isVectorTy() != IC->getType()->isVectorTy()) - return nullptr; - - Value *CmpLHS = IC->getOperand(0); - Value *CmpRHS = IC->getOperand(1); - - Value *V; - unsigned C1Log; - bool IsEqualZero; - bool NeedAnd = false; - if (IC->isEquality()) { - if (!match(CmpRHS, m_Zero())) - return nullptr; - - const APInt *C1; - if (!match(CmpLHS, m_And(m_Value(), m_Power2(C1)))) - return nullptr; - - V = CmpLHS; - C1Log = C1->logBase2(); - IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_EQ; - } else if (IC->getPredicate() == ICmpInst::ICMP_SLT || - IC->getPredicate() == ICmpInst::ICMP_SGT) { - // We also need to recognize (icmp slt (trunc (X)), 0) and - // (icmp sgt (trunc (X)), -1). - IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_SGT; - if ((IsEqualZero && !match(CmpRHS, m_AllOnes())) || - (!IsEqualZero && !match(CmpRHS, m_Zero()))) - return nullptr; - - if (!match(CmpLHS, m_OneUse(m_Trunc(m_Value(V))))) - return nullptr; - - C1Log = CmpLHS->getType()->getScalarSizeInBits() - 1; - NeedAnd = true; - } else { - return nullptr; - } - - const APInt *C2; - bool OrOnTrueVal = false; - bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2))); - if (!OrOnFalseVal) - OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2))); - - if (!OrOnFalseVal && !OrOnTrueVal) - return nullptr; - - Value *Y = OrOnFalseVal ? TrueVal : FalseVal; - - unsigned C2Log = C2->logBase2(); - - bool NeedXor = (!IsEqualZero && OrOnFalseVal) || (IsEqualZero && OrOnTrueVal); - bool NeedShift = C1Log != C2Log; - bool NeedZExtTrunc = Y->getType()->getScalarSizeInBits() != - V->getType()->getScalarSizeInBits(); - - // Make sure we don't create more instructions than we save. - Value *Or = OrOnFalseVal ? FalseVal : TrueVal; - if ((NeedShift + NeedXor + NeedZExtTrunc) > - (IC->hasOneUse() + Or->hasOneUse())) - return nullptr; - - if (NeedAnd) { - // Insert the AND instruction on the input to the truncate. - APInt C1 = APInt::getOneBitSet(V->getType()->getScalarSizeInBits(), C1Log); - V = Builder.CreateAnd(V, ConstantInt::get(V->getType(), C1)); - } - - if (C2Log > C1Log) { - V = Builder.CreateZExtOrTrunc(V, Y->getType()); - V = Builder.CreateShl(V, C2Log - C1Log); - } else if (C1Log > C2Log) { - V = Builder.CreateLShr(V, C1Log - C2Log); - V = Builder.CreateZExtOrTrunc(V, Y->getType()); - } else - V = Builder.CreateZExtOrTrunc(V, Y->getType()); - - if (NeedXor) - V = Builder.CreateXor(V, *C2); - - return Builder.CreateOr(V, Y); -} - /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single /// call to cttz/ctlz with flag 'is_zero_undef' cleared. /// @@ -591,111 +622,6 @@ return &Sel; } -/// If one of the constants is zero (we know they can't both be) and we have an -/// icmp instruction with zero, and we have an 'and' with the non-constant value -/// and a power of two we can turn the select into a shift on the result of the -/// 'and'. -/// This folds: -/// select (icmp eq (and X, C1)), C2, C3 -/// iff C1 is a power 2 and the difference between C2 and C3 is a power of 2. -/// To something like: -/// (shr (and (X, C1)), (log2(C1) - log2(C2-C3))) + C3 -/// Or: -/// (shl (and (X, C1)), (log2(C2-C3) - log2(C1))) + C3 -/// With some variations depending if C3 is larger than C2, or the shift -/// isn't needed, or the bit widths don't match. -static Value *foldSelectICmpAnd(Type *SelType, const ICmpInst *IC, - APInt TrueVal, APInt FalseVal, - InstCombiner::BuilderTy &Builder) { - assert(SelType->isIntOrIntVectorTy() && "Not an integer select?"); - - // If this is a vector select, we need a vector compare. - if (SelType->isVectorTy() != IC->getType()->isVectorTy()) - return nullptr; - - Value *V; - APInt AndMask; - bool CreateAnd = false; - ICmpInst::Predicate Pred = IC->getPredicate(); - if (ICmpInst::isEquality(Pred)) { - if (!match(IC->getOperand(1), m_Zero())) - return nullptr; - - V = IC->getOperand(0); - - const APInt *AndRHS; - if (!match(V, m_And(m_Value(), m_Power2(AndRHS)))) - return nullptr; - - AndMask = *AndRHS; - } else if (decomposeBitTestICmp(IC->getOperand(0), IC->getOperand(1), - Pred, V, AndMask)) { - assert(ICmpInst::isEquality(Pred) && "Not equality test?"); - - if (!AndMask.isPowerOf2()) - return nullptr; - - CreateAnd = true; - } else { - return nullptr; - } - - // If both select arms are non-zero see if we have a select of the form - // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic - // for 'x ? 2^n : 0' and fix the thing up at the end. - APInt Offset(TrueVal.getBitWidth(), 0); - if (!TrueVal.isNullValue() && !FalseVal.isNullValue()) { - if ((TrueVal - FalseVal).isPowerOf2()) - Offset = FalseVal; - else if ((FalseVal - TrueVal).isPowerOf2()) - Offset = TrueVal; - else - return nullptr; - - // Adjust TrueVal and FalseVal to the offset. - TrueVal -= Offset; - FalseVal -= Offset; - } - - // Make sure one of the select arms is a power of 2. - if (!TrueVal.isPowerOf2() && !FalseVal.isPowerOf2()) - return nullptr; - - // Determine which shift is needed to transform result of the 'and' into the - // desired result. - const APInt &ValC = !TrueVal.isNullValue() ? TrueVal : FalseVal; - unsigned ValZeros = ValC.logBase2(); - unsigned AndZeros = AndMask.logBase2(); - - if (CreateAnd) { - // Insert the AND instruction on the input to the truncate. - V = Builder.CreateAnd(V, ConstantInt::get(V->getType(), AndMask)); - } - - // If types don't match we can still convert the select by introducing a zext - // or a trunc of the 'and'. - if (ValZeros > AndZeros) { - V = Builder.CreateZExtOrTrunc(V, SelType); - V = Builder.CreateShl(V, ValZeros - AndZeros); - } else if (ValZeros < AndZeros) { - V = Builder.CreateLShr(V, AndZeros - ValZeros); - V = Builder.CreateZExtOrTrunc(V, SelType); - } else - V = Builder.CreateZExtOrTrunc(V, SelType); - - // Okay, now we know that everything is set up, we just don't know whether we - // have a icmp_ne or icmp_eq and whether the true or false val is the zero. - bool ShouldNotVal = !TrueVal.isNullValue(); - ShouldNotVal ^= Pred == ICmpInst::ICMP_NE; - if (ShouldNotVal) - V = Builder.CreateXor(V, ValC); - - // Apply an offset if needed. - if (!Offset.isNullValue()) - V = Builder.CreateAdd(V, ConstantInt::get(V->getType(), Offset)); - return V; -} - /// Visit a SelectInst that has an ICmpInst as its first operand. Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI) { @@ -820,9 +746,6 @@ } } - if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder)) - return replaceInstUsesWith(SI, V); - if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder)) return replaceInstUsesWith(SI, V); Index: test/Transforms/InstCombine/select-with-bitwise-ops.ll =================================================================== --- test/Transforms/InstCombine/select-with-bitwise-ops.ll +++ test/Transforms/InstCombine/select-with-bitwise-ops.ll @@ -31,6 +31,20 @@ ret <2 x i32> %select } +define i32 @select_icmp_eq_and_1_0_xor_2(i32 %x, i32 %y) { +; CHECK-LABEL: @select_icmp_eq_and_1_0_xor_2( +; CHECK-NEXT: [[AND:%.*]] = shl i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[AND]], 2 +; CHECK-NEXT: [[SELECT:%.*]] = xor i32 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[SELECT]] +; + %and = and i32 %x, 1 + %cmp = icmp eq i32 %and, 0 + %xor = xor i32 %y, 2 + %select = select i1 %cmp, i32 %y, i32 %xor + ret i32 %select +} + define i32 @select_icmp_eq_and_32_0_or_8(i32 %x, i32 %y) { ; CHECK-LABEL: @select_icmp_eq_and_32_0_or_8( ; CHECK-NEXT: [[AND:%.*]] = lshr i32 %x, 2 @@ -59,6 +73,20 @@ ret <2 x i32> %select } +define i32 @select_icmp_eq_and_32_0_xor_8(i32 %x, i32 %y) { +; CHECK-LABEL: @select_icmp_eq_and_32_0_xor_8( +; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[X:%.*]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[AND]], 8 +; CHECK-NEXT: [[SELECT:%.*]] = xor i32 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[SELECT]] +; + %and = and i32 %x, 32 + %cmp = icmp eq i32 %and, 0 + %xor = xor i32 %y, 8 + %select = select i1 %cmp, i32 %y, i32 %xor + ret i32 %select +} + define i32 @select_icmp_ne_0_and_4096_or_4096(i32 %x, i32 %y) { ; CHECK-LABEL: @select_icmp_ne_0_and_4096_or_4096( ; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 4096 @@ -87,6 +115,20 @@ ret <2 x i32> %select } +define i32 @select_icmp_ne_0_and_4096_xor_4096(i32 %x, i32 %y) { +; CHECK-LABEL: @select_icmp_ne_0_and_4096_xor_4096( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[AND]], 4096 +; CHECK-NEXT: [[SELECT:%.*]] = xor i32 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[SELECT]] +; + %and = and i32 %x, 4096 + %cmp = icmp ne i32 0, %and + %xor = xor i32 %y, 4096 + %select = select i1 %cmp, i32 %y, i32 %xor + ret i32 %select +} + define i32 @select_icmp_eq_and_4096_0_or_4096(i32 %x, i32 %y) { ; CHECK-LABEL: @select_icmp_eq_and_4096_0_or_4096( ; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 4096 @@ -113,6 +155,19 @@ ret <2 x i32> %select } +define i32 @select_icmp_eq_and_4096_0_xor_4096(i32 %x, i32 %y) { +; CHECK-LABEL: @select_icmp_eq_and_4096_0_xor_4096( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[SELECT:%.*]] = xor i32 [[AND]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[SELECT]] +; + %and = and i32 %x, 4096 + %cmp = icmp eq i32 %and, 0 + %xor = xor i32 %y, 4096 + %select = select i1 %cmp, i32 %y, i32 %xor + ret i32 %select +} + define i32 @select_icmp_eq_0_and_1_or_1(i64 %x, i32 %y) { ; CHECK-LABEL: @select_icmp_eq_0_and_1_or_1( ; CHECK-NEXT: [[X_TR:%.*]] = trunc i64 %x to i32 @@ -131,8 +186,8 @@ ; CHECK-LABEL: @select_icmp_eq_0_and_1_or_1_vec( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], [[Y:%.*]] -; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; %and = and <2 x i64> %x, %cmp = icmp eq <2 x i64> %and, zeroinitializer @@ -141,6 +196,20 @@ ret <2 x i32> %select } +define i32 @select_icmp_eq_0_and_1_xor_1(i64 %x, i32 %y) { +; CHECK-LABEL: @select_icmp_eq_0_and_1_xor_1( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1 +; CHECK-NEXT: [[SELECT:%.*]] = xor i32 [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[SELECT]] +; + %and = and i64 %x, 1 + %cmp = icmp eq i64 %and, 0 + %xor = xor i32 %y, 1 + %select = select i1 %cmp, i32 %y, i32 %xor + ret i32 %select +} + define i32 @select_icmp_ne_0_and_4096_or_32(i32 %x, i32 %y) { ; CHECK-LABEL: @select_icmp_ne_0_and_4096_or_32( ; CHECK-NEXT: [[AND:%.*]] = lshr i32 %x, 7 @@ -156,6 +225,21 @@ ret i32 %select } +define i32 @select_icmp_ne_0_and_4096_xor_32(i32 %x, i32 %y) { +; CHECK-LABEL: @select_icmp_ne_0_and_4096_xor_32( +; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[X:%.*]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[AND]], 32 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 32 +; CHECK-NEXT: [[SELECT:%.*]] = xor i32 [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[SELECT]] +; + %and = and i32 %x, 4096 + %cmp = icmp ne i32 0, %and + %xor = xor i32 %y, 32 + %select = select i1 %cmp, i32 %y, i32 %xor + ret i32 %select +} + define i32 @select_icmp_ne_0_and_32_or_4096(i32 %x, i32 %y) { ; CHECK-LABEL: @select_icmp_ne_0_and_32_or_4096( ; CHECK-NEXT: [[AND:%.*]] = shl i32 %x, 7 @@ -186,12 +270,28 @@ ret <2 x i32> %select } +define i32 @select_icmp_ne_0_and_32_xor_4096(i32 %x, i32 %y) { +; CHECK-LABEL: @select_icmp_ne_0_and_32_xor_4096( +; CHECK-NEXT: [[AND:%.*]] = shl i32 [[X:%.*]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[AND]], 4096 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 4096 +; CHECK-NEXT: [[SELECT:%.*]] = xor i32 [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[SELECT]] +; + %and = and i32 %x, 32 + %cmp = icmp ne i32 0, %and + %xor = xor i32 %y, 4096 + %select = select i1 %cmp, i32 %y, i32 %xor + ret i32 %select +} + define i8 @select_icmp_ne_0_and_1073741824_or_8(i32 %x, i8 %y) { ; CHECK-LABEL: @select_icmp_ne_0_and_1073741824_or_8( -; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 1073741824 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 -; CHECK-NEXT: [[OR:%.*]] = or i8 %y, 8 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i8 [[OR]], i8 %y +; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[X:%.*]], 27 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AND]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[TMP2]], 8 +; CHECK-NEXT: [[SELECT:%.*]] = or i8 [[TMP3]], [[Y:%.*]] ; CHECK-NEXT: ret i8 [[SELECT]] ; %and = and i32 %x, 1073741824 @@ -201,12 +301,29 @@ ret i8 %select } +define i8 @select_icmp_ne_0_and_1073741824_xor_8(i32 %x, i8 %y) { +; CHECK-LABEL: @select_icmp_ne_0_and_1073741824_xor_8( +; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[X:%.*]], 27 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AND]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[TMP2]], 8 +; CHECK-NEXT: [[SELECT:%.*]] = xor i8 [[TMP3]], [[Y:%.*]] +; CHECK-NEXT: ret i8 [[SELECT]] +; + %and = and i32 %x, 1073741824 + %cmp = icmp ne i32 0, %and + %xor = xor i8 %y, 8 + %select = select i1 %cmp, i8 %y, i8 %xor + ret i8 %select +} + define i32 @select_icmp_ne_0_and_8_or_1073741824(i8 %x, i32 %y) { ; CHECK-LABEL: @select_icmp_ne_0_and_8_or_1073741824( -; CHECK-NEXT: [[AND:%.*]] = and i8 %x, 8 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[AND]], 0 -; CHECK-NEXT: [[OR:%.*]] = or i32 %y, 1073741824 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[OR]], i32 %y +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[AND]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 27 +; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 1073741824 +; CHECK-NEXT: [[SELECT:%.*]] = or i32 [[TMP3]], [[Y:%.*]] ; CHECK-NEXT: ret i32 [[SELECT]] ; %and = and i8 %x, 8 @@ -216,6 +333,22 @@ ret i32 %select } +define i32 @select_icmp_ne_0_and_8_xor_1073741824(i8 %x, i32 %y) { +; CHECK-LABEL: @select_icmp_ne_0_and_8_xor_1073741824( +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[AND]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 27 +; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 1073741824 +; CHECK-NEXT: [[SELECT:%.*]] = xor i32 [[TMP3]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[SELECT]] +; + %and = and i8 %x, 8 + %cmp = icmp ne i8 0, %and + %xor = xor i32 %y, 1073741824 + %select = select i1 %cmp, i32 %y, i32 %xor + ret i32 %select +} + ; We can't combine here, because the cmp is scalar and the or vector. ; Just make sure we don't assert. define <2 x i32> @select_icmp_eq_and_1_0_or_vector_of_2s(i32 %x, <2 x i32> %y) { @@ -259,10 +392,9 @@ define i64 @select_icmp_x_and_8_eq_0_y_xor_8(i32 %x, i64 %y) { ; CHECK-LABEL: @select_icmp_x_and_8_eq_0_y_xor_8( -; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 8 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 -; CHECK-NEXT: [[XOR:%.*]] = xor i64 %y, 8 -; CHECK-NEXT: [[Y_XOR:%.*]] = select i1 [[CMP]], i64 %y, i64 [[XOR]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[AND]] to i64 +; CHECK-NEXT: [[Y_XOR:%.*]] = xor i64 [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret i64 [[Y_XOR]] ; %and = and i32 %x, 8 @@ -274,10 +406,10 @@ define i64 @select_icmp_x_and_8_ne_0_y_xor_8(i32 %x, i64 %y) { ; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_xor_8( -; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 8 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 -; CHECK-NEXT: [[XOR:%.*]] = xor i64 %y, 8 -; CHECK-NEXT: [[XOR_Y:%.*]] = select i1 [[CMP]], i64 [[XOR]], i64 %y +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[AND]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[XOR_Y:%.*]] = xor i64 [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret i64 [[XOR_Y]] ; %and = and i32 %x, 8 @@ -479,6 +611,20 @@ ret <2 x i32> %select } +define i32 @test68_xor(i32 %x, i32 %y) { +; CHECK-LABEL: @test68_xor( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2 +; CHECK-NEXT: [[SELECT:%.*]] = xor i32 [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[SELECT]] +; + %and = and i32 %x, 128 + %cmp = icmp eq i32 %and, 0 + %xor = xor i32 %y, 2 + %select = select i1 %cmp, i32 %y, i32 %xor + ret i32 %select +} + define i32 @test69(i32 %x, i32 %y) { ; CHECK-LABEL: @test69( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6 @@ -509,12 +655,26 @@ ret <2 x i32> %select } -; TODO: we should be able to remove this select +define i32 @test69_xor(i32 %x, i32 %y) { +; CHECK-LABEL: @test69_xor( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 2 +; CHECK-NEXT: [[SELECT:%.*]] = xor i32 [[TMP3]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[SELECT]] +; + %and = and i32 %x, 128 + %cmp = icmp ne i32 %and, 0 + %xor = xor i32 %y, 2 + %select = select i1 %cmp, i32 %y, i32 %xor + ret i32 %select +} + define i8 @test70(i8 %x, i8 %y) { ; CHECK-LABEL: @test70( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0 -; CHECK-NEXT: [[OR:%.*]] = or i8 [[Y:%.*]], 2 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i8 [[OR]], i8 [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[X:%.*]], 6 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 2 +; CHECK-NEXT: [[SELECT:%.*]] = or i8 [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret i8 [[SELECT]] ; %cmp = icmp slt i8 %x, 0 @@ -627,11 +787,11 @@ define i32 @shift_no_xor_multiuse_or(i32 %x, i32 %y) { ; CHECK-LABEL: @shift_no_xor_multiuse_or( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2 -; CHECK-NEXT: [[AND:%.*]] = shl i32 [[X:%.*]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[AND]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y]] -; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[OR]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[OR]] ; CHECK-NEXT: ret i32 [[RES]] ; %and = and i32 %x, 1 @@ -642,12 +802,30 @@ ret i32 %res } +define i32 @shift_no_xor_multiuse_xor(i32 %x, i32 %y) { +; CHECK-LABEL: @shift_no_xor_multiuse_xor( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y:%.*]], 2 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[XOR]] +; CHECK-NEXT: ret i32 [[RES]] +; + %and = and i32 %x, 1 + %cmp = icmp eq i32 %and, 0 + %xor = xor i32 %y, 2 + %select = select i1 %cmp, i32 %y, i32 %xor + %res = mul i32 %select, %xor ; to bump up use count of the Xor + ret i32 %res +} + define i32 @no_shift_no_xor_multiuse_or(i32 %x, i32 %y) { ; CHECK-LABEL: @no_shift_no_xor_multiuse_or( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 4096 -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[AND]], [[Y]] -; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP1]], [[OR]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[OR]] ; CHECK-NEXT: ret i32 [[RES]] ; %and = and i32 %x, 4096 @@ -658,13 +836,30 @@ ret i32 %res } +define i32 @no_shift_no_xor_multiuse_xor(i32 %x, i32 %y) { +; CHECK-LABEL: @no_shift_no_xor_multiuse_xor( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y:%.*]], 4096 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[XOR]] +; CHECK-NEXT: ret i32 [[RES]] +; + %and = and i32 %x, 4096 + %cmp = icmp eq i32 %and, 0 + %xor = xor i32 %y, 4096 + %select = select i1 %cmp, i32 %y, i32 %xor + %res = mul i32 %select, %xor ; to bump up use count of the Xor + ret i32 %res +} + define i32 @no_shift_xor_multiuse_or(i32 %x, i32 %y) { ; CHECK-LABEL: @no_shift_xor_multiuse_or( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 4096 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[AND]], 4096 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y]] -; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[OR]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[OR]], i32 [[Y]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[OR]] ; CHECK-NEXT: ret i32 [[RES]] ; %and = and i32 %x, 4096 @@ -675,6 +870,23 @@ ret i32 %res } +define i32 @no_shift_xor_multiuse_xor(i32 %x, i32 %y) { +; CHECK-LABEL: @no_shift_xor_multiuse_xor( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y:%.*]], 4096 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[XOR]], i32 [[Y]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[XOR]] +; CHECK-NEXT: ret i32 [[RES]] +; + %and = and i32 %x, 4096 + %cmp = icmp ne i32 0, %and + %xor = xor i32 %y, 4096 + %select = select i1 %cmp, i32 %y, i32 %xor + %res = mul i32 %select, %xor ; to bump up use count of the Xor + ret i32 %res +} + define i32 @shift_xor_multiuse_or(i32 %x, i32 %y) { ; CHECK-LABEL: @shift_xor_multiuse_or( ; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 4096 @@ -692,6 +904,23 @@ ret i32 %res } +define i32 @shift_xor_multiuse_xor(i32 %x, i32 %y) { +; CHECK-LABEL: @shift_xor_multiuse_xor( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y:%.*]], 2048 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[XOR]], i32 [[Y]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[XOR]] +; CHECK-NEXT: ret i32 [[RES]] +; + %and = and i32 %x, 4096 + %cmp = icmp ne i32 0, %and + %xor = xor i32 %y, 2048 + %select = select i1 %cmp, i32 %y, i32 %xor + %res = mul i32 %select, %xor ; to bump up use count of the Xor + ret i32 %res +} + define i32 @shift_no_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) { ; CHECK-LABEL: @shift_no_xor_multiuse_cmp( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1 @@ -711,6 +940,25 @@ ret i32 %res } +define i32 @shift_no_xor_multiuse_cmp_with_xor(i32 %x, i32 %y, i32 %z, i32 %w) { +; CHECK-LABEL: @shift_no_xor_multiuse_cmp_with_xor( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[AND]], 1 +; CHECK-NEXT: [[SELECT:%.*]] = xor i32 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]] +; CHECK-NEXT: ret i32 [[RES]] +; + %and = and i32 %x, 1 + %cmp = icmp eq i32 %and, 0 + %xor = xor i32 %y, 2 + %select = select i1 %cmp, i32 %y, i32 %xor + %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp + %res = mul i32 %select, %select2 + ret i32 %res +} + define i32 @no_shift_no_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) { ; CHECK-LABEL: @no_shift_no_xor_multiuse_cmp( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 @@ -729,6 +977,24 @@ ret i32 %res } +define i32 @no_shift_no_xor_multiuse_cmp_with_xor(i32 %x, i32 %y, i32 %z, i32 %w) { +; CHECK-LABEL: @no_shift_no_xor_multiuse_cmp_with_xor( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[SELECT:%.*]] = xor i32 [[AND]], [[Y:%.*]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]] +; CHECK-NEXT: ret i32 [[RES]] +; + %and = and i32 %x, 4096 + %cmp = icmp eq i32 %and, 0 + %xor = xor i32 %y, 4096 + %select = select i1 %cmp, i32 %y, i32 %xor + %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp + %res = mul i32 %select, %select2 + ret i32 %res +} + define i32 @no_shift_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) { ; CHECK-LABEL: @no_shift_xor_multiuse_cmp( ; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 4096 @@ -748,13 +1014,33 @@ ret i32 %res } +define i32 @no_shift_xor_multiuse_cmp_with_xor(i32 %x, i32 %y, i32 %z, i32 %w) { +; CHECK-LABEL: @no_shift_xor_multiuse_cmp_with_xor( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[AND]], 4096 +; CHECK-NEXT: [[SELECT:%.*]] = xor i32 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[W:%.*]], i32 [[Z:%.*]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]] +; CHECK-NEXT: ret i32 [[RES]] +; + %and = and i32 %x, 4096 + %cmp = icmp ne i32 0, %and + %xor = xor i32 %y, 4096 + %select = select i1 %cmp, i32 %y, i32 %xor + %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp + %res = mul i32 %select, %select2 + ret i32 %res +} + define i32 @shift_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) { ; CHECK-LABEL: @shift_xor_multiuse_cmp( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 -; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2048 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]] -; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[AND]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 2048 +; CHECK-NEXT: [[SELECT:%.*]] = or i32 [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[W:%.*]], i32 [[Z:%.*]] ; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]] ; CHECK-NEXT: ret i32 [[RES]] ; @@ -767,6 +1053,26 @@ ret i32 %res } +define i32 @shift_xor_multiuse_cmp_with_xor(i32 %x, i32 %y, i32 %z, i32 %w) { +; CHECK-LABEL: @shift_xor_multiuse_cmp_with_xor( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[AND]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 2048 +; CHECK-NEXT: [[SELECT:%.*]] = xor i32 [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[W:%.*]], i32 [[Z:%.*]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]] +; CHECK-NEXT: ret i32 [[RES]] +; + %and = and i32 %x, 4096 + %cmp = icmp ne i32 0, %and + %xor = xor i32 %y, 2048 + %select = select i1 %cmp, i32 %y, i32 %xor + %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp + %res = mul i32 %select, %select2 + ret i32 %res +} + define i32 @shift_no_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) { ; CHECK-LABEL: @shift_no_xor_multiuse_cmp_or( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1 @@ -788,14 +1094,35 @@ ret i32 %res2 } +define i32 @shift_no_xor_multiuse_cmp_xor(i32 %x, i32 %y, i32 %z, i32 %w) { +; CHECK-LABEL: @shift_no_xor_multiuse_cmp_xor( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y:%.*]], 2 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]] +; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[XOR]] +; CHECK-NEXT: ret i32 [[RES2]] +; + %and = and i32 %x, 1 + %cmp = icmp eq i32 %and, 0 + %xor = xor i32 %y, 2 + %select = select i1 %cmp, i32 %y, i32 %xor + %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp + %res = mul i32 %select, %select2 + %res2 = mul i32 %res, %xor ; to bump up the use count of the xor + ret i32 %res2 +} + define i32 @no_shift_no_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) { ; CHECK-LABEL: @no_shift_no_xor_multiuse_cmp_or( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 4096 -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[AND]], [[Y]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]] ; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]] -; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP1]], [[SELECT2]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]] ; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[OR]] ; CHECK-NEXT: ret i32 [[RES2]] ; @@ -809,6 +1136,27 @@ ret i32 %res2 } +define i32 @no_shift_no_xor_multiuse_cmp_xor(i32 %x, i32 %y, i32 %z, i32 %w) { +; CHECK-LABEL: @no_shift_no_xor_multiuse_cmp_xor( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y:%.*]], 4096 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]] +; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[XOR]] +; CHECK-NEXT: ret i32 [[RES2]] +; + %and = and i32 %x, 4096 + %cmp = icmp eq i32 %and, 0 + %xor = xor i32 %y, 4096 + %select = select i1 %cmp, i32 %y, i32 %xor + %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp + %res = mul i32 %select, %select2 + %res2 = mul i32 %res, %xor ; to bump up the use count of the xor + ret i32 %res2 +} + define i32 @no_shift_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) { ; CHECK-LABEL: @no_shift_xor_multiuse_cmp_or( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 @@ -830,6 +1178,27 @@ ret i32 %res2 } +define i32 @no_shift_xor_multiuse_cmp_xor(i32 %x, i32 %y, i32 %z, i32 %w) { +; CHECK-LABEL: @no_shift_xor_multiuse_cmp_xor( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y:%.*]], 4096 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]] +; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[XOR]] +; CHECK-NEXT: ret i32 [[RES2]] +; + %and = and i32 %x, 4096 + %cmp = icmp ne i32 0, %and + %xor = xor i32 %y, 4096 + %select = select i1 %cmp, i32 %y, i32 %xor + %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp + %res = mul i32 %select, %select2 + %res2 = mul i32 %res, %xor ; to bump up the use count of the xor + ret i32 %res2 +} + define i32 @shift_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) { ; CHECK-LABEL: @shift_xor_multiuse_cmp_or( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 @@ -850,3 +1219,24 @@ %res2 = mul i32 %res, %or ; to bump up the use count of the or ret i32 %res2 } + +define i32 @shift_xor_multiuse_cmp_xor(i32 %x, i32 %y, i32 %z, i32 %w) { +; CHECK-LABEL: @shift_xor_multiuse_cmp_xor( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y:%.*]], 2048 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]] +; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[XOR]] +; CHECK-NEXT: ret i32 [[RES2]] +; + %and = and i32 %x, 4096 + %cmp = icmp ne i32 0, %and + %xor = xor i32 %y, 2048 + %select = select i1 %cmp, i32 %y, i32 %xor + %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp + %res = mul i32 %select, %select2 + %res2 = mul i32 %res, %xor ; to bump up the use count of the xor + ret i32 %res2 +}