Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1589,10 +1589,29 @@ return LastInst; } +/// If all elements of two constant vectors are 0/-1 and inverses, return true. +static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) { + unsigned NumElts = C1->getType()->getVectorNumElements(); + for (unsigned i = 0; i != NumElts; ++i) { + Constant *EltC1 = C1->getAggregateElement(i); + Constant *EltC2 = C2->getAggregateElement(i); + if (!EltC1 || !EltC2) + return false; + + // One element must be all ones, and the other must be all zeros. + // FIXME: Allow undef elements. + if (!((match(EltC1, m_Zero()) && match(EltC2, m_AllOnes())) || + (match(EltC2, m_Zero()) && match(EltC1, m_AllOnes())))) + return false; + } + return true; +} + /// We have an expression of the form (A & C) | (B & D). If A is a scalar or /// vector composed of all-zeros or all-ones values and is the bitwise 'not' of /// B, it can be used as the condition operand of a select instruction. -static Value *getSelectCondition(Value *A, Value *B) { +static Value *getSelectCondition(Value *A, Value *B, + InstCombiner::BuilderTy &Builder) { // If these are scalars or vectors of i1, A can be used directly. Type *Ty = A->getType(); if (match(A, m_Not(m_Specific(B))) && Ty->getScalarType()->isIntegerTy(1)) @@ -1606,8 +1625,26 @@ m_SExt(m_Not(m_Specific(Cond)))))) return Cond; - // TODO: Try more matches that only apply to non-splat constant vectors. + // All scalar (and most vector) possibilities should be handled now. + // Try more matches that only apply to non-splat constant vectors. + if (!Ty->isVectorTy()) + return nullptr; + // If both operands are constants, see if the constants are inverse bitmasks. + Constant *AC, *BC; + if (match(A, m_Constant(AC)) && match(B, m_Constant(BC)) && + areInverseVectorBitmasks(AC, BC)) + return ConstantExpr::getTrunc(AC, CmpInst::makeCmpResultType(Ty)); + + // If both operands are xor'd with constants using the same sexted boolean + // operand, see if the constants are inverse bitmasks. + if (match(A, (m_Xor(m_SExt(m_Value(Cond)), m_Constant(AC)))) && + match(B, (m_Xor(m_SExt(m_Specific(Cond)), m_Constant(BC)))) && + Cond->getType()->getScalarType()->isIntegerTy(1) && + areInverseVectorBitmasks(AC, BC)) { + AC = ConstantExpr::getTrunc(AC, CmpInst::makeCmpResultType(Ty)); + return Builder.CreateXor(Cond, AC); + } return nullptr; } @@ -1625,7 +1662,7 @@ B = SrcB; } - if (Value *Cond = getSelectCondition(A, B)) { + if (Value *Cond = getSelectCondition(A, B, Builder)) { // ((bc Cond) & C) | ((bc ~Cond) & D) --> bc (select Cond, (bc C), (bc D)) // The bitcasts will either all exist or all not exist. The builder will // not create unnecessary casts if the types already match. Index: llvm/trunk/test/Transforms/InstCombine/logical-select.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/logical-select.ll +++ llvm/trunk/test/Transforms/InstCombine/logical-select.ll @@ -366,15 +366,12 @@ ret i4 %or } -; FIXME: Missed conversions to select below here. ; Inverted 'and' constants mean this is a select. define <4 x i32> @vec_sel_consts(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @vec_sel_consts( -; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> %a, -; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> %b, -; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND1]], [[AND2]] -; CHECK-NEXT: ret <4 x i32> [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> , <4 x i32> %a, <4 x i32> %b +; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %and1 = and <4 x i32> %a, %and2 = and <4 x i32> %b, @@ -386,10 +383,8 @@ define <3 x i129> @vec_sel_consts_weird(<3 x i129> %a, <3 x i129> %b) { ; CHECK-LABEL: @vec_sel_consts_weird( -; CHECK-NEXT: [[AND1:%.*]] = and <3 x i129> %a, -; CHECK-NEXT: [[AND2:%.*]] = and <3 x i129> %b, -; CHECK-NEXT: [[OR:%.*]] = or <3 x i129> [[AND2]], [[AND1]] -; CHECK-NEXT: ret <3 x i129> [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> , <3 x i129> %b, <3 x i129> %a +; CHECK-NEXT: ret <3 x i129> [[TMP1]] ; %and1 = and <3 x i129> %a, %and2 = and <3 x i129> %b, @@ -416,13 +411,30 @@ define <4 x i32> @vec_sel_xor(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) { ; CHECK-LABEL: @vec_sel_xor( +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> %c, +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> %a, <4 x i32> %b +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; + %mask = sext <4 x i1> %c to <4 x i32> + %mask_flip1 = xor <4 x i32> %mask, + %not_mask_flip1 = xor <4 x i32> %mask, + %and1 = and <4 x i32> %not_mask_flip1, %a + %and2 = and <4 x i32> %mask_flip1, %b + %or = or <4 x i32> %and1, %and2 + ret <4 x i32> %or +} + +; Allow the transform even if the mask values have multiple uses because +; there's still a net reduction of instructions from removing the and/and/or. + +define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) { +; CHECK-LABEL: @vec_sel_xor_multi_use( ; CHECK-NEXT: [[MASK:%.*]] = sext <4 x i1> %c to <4 x i32> ; CHECK-NEXT: [[MASK_FLIP1:%.*]] = xor <4 x i32> [[MASK]], -; CHECK-NEXT: [[NOT_MASK_FLIP1:%.*]] = xor <4 x i32> [[MASK]], -; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> [[NOT_MASK_FLIP1]], %a -; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> [[MASK_FLIP1]], %b -; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND1]], [[AND2]] -; CHECK-NEXT: ret <4 x i32> [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> %c, +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> %a, <4 x i32> %b +; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[TMP2]], [[MASK_FLIP1]] +; CHECK-NEXT: ret <4 x i32> [[ADD]] ; %mask = sext <4 x i1> %c to <4 x i32> %mask_flip1 = xor <4 x i32> %mask, @@ -430,6 +442,7 @@ %and1 = and <4 x i32> %not_mask_flip1, %a %and2 = and <4 x i32> %mask_flip1, %b %or = or <4 x i32> %and1, %and2 - ret <4 x i32> %or + %add = add <4 x i32> %or, %mask_flip1 + ret <4 x i32> %add }