Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2418,6 +2418,28 @@ return nullptr; } +/// If we have a masked merge, in the form of: +/// | A | |B| +/// ((x ^ y) & M) ^ y +/// | D | +/// If A has one use, and M is inverted, we can convert it into: +/// | D | +/// ((x ^ y) & ~M) ^ x +static Instruction *invertMaskInMaskedMerge(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + Value *B, *X, *D, *M; + if (!match(&I, m_c_Xor(m_Value(B), + m_OneUse(m_c_And( + m_CombineAnd(m_c_Xor(m_c_Specific(&B), m_Value(X)), + m_Value(D)), + m_Not(m_Value(M))))))) + return nullptr; + + // De-invert the mask and swap the value in B part. + Value *NewA = Builder.CreateAnd(D, M); + return BinaryOperator::CreateXor(NewA, X); +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -2468,6 +2490,9 @@ return BinaryOperator::CreateAnd(X, NotY); } + if (Instruction *Xor = invertMaskInMaskedMerge(I, Builder)) + return Xor; + // Is this a 'not' (~) fed by a binary operator? BinaryOperator *NotVal; if (match(&I, m_Not(m_BinOp(NotVal)))) { Index: test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-scalar.ll =================================================================== --- test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-scalar.ll +++ test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-scalar.ll @@ -8,10 +8,9 @@ define i4 @scalar (i4 %x, i4 %y, i4 %m) { ; CHECK-LABEL: @scalar( -; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[X]] ; CHECK-NEXT: ret i4 [[R]] ; %im = xor i4 %m, -1 @@ -32,10 +31,9 @@ define i4 @c_1_0_0 (i4 %x, i4 %y, i4 %m) { ; CHECK-LABEL: @c_1_0_0( -; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[X]] ; CHECK-NEXT: ret i4 [[R]] ; %im = xor i4 %m, -1 @@ -47,10 +45,9 @@ define i4 @c_0_1_0 (i4 %x, i4 %y, i4 %m) { ; CHECK-LABEL: @c_0_1_0( -; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[Y]] ; CHECK-NEXT: ret i4 [[R]] ; %im = xor i4 %m, -1 @@ -62,12 +59,11 @@ define i4 @c_0_0_1 (i4 %m) { ; CHECK-LABEL: @c_0_0_1( -; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 ; CHECK-NEXT: [[X:%.*]] = call i4 @gen4() ; CHECK-NEXT: [[Y:%.*]] = call i4 @gen4() ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X]], [[Y]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor i4 [[Y]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[X]] ; CHECK-NEXT: ret i4 [[R]] ; %im = xor i4 %m, -1 @@ -81,10 +77,9 @@ define i4 @c_1_1_0 (i4 %x, i4 %y, i4 %m) { ; CHECK-LABEL: @c_1_1_0( -; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[Y]] ; CHECK-NEXT: ret i4 [[R]] ; %im = xor i4 %m, -1 @@ -96,11 +91,10 @@ define i4 @c_1_0_1 (i4 %x, i4 %m) { ; CHECK-LABEL: @c_1_0_1( -; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 ; CHECK-NEXT: [[Y:%.*]] = call i4 @gen4() ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor i4 [[Y]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[X]] ; CHECK-NEXT: ret i4 [[R]] ; %im = xor i4 %m, -1 @@ -113,11 +107,10 @@ define i4 @c_0_1_1 (i4 %y, i4 %m) { ; CHECK-LABEL: @c_0_1_1( -; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 ; CHECK-NEXT: [[X:%.*]] = call i4 @gen4() ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor i4 [[X]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[Y]] ; CHECK-NEXT: ret i4 [[R]] ; %im = xor i4 %m, -1 @@ -130,12 +123,11 @@ define i4 @c_1_1_1 (i4 %m) { ; CHECK-LABEL: @c_1_1_1( -; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 ; CHECK-NEXT: [[X:%.*]] = call i4 @gen4() ; CHECK-NEXT: [[Y:%.*]] = call i4 @gen4() ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y]], [[X]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor i4 [[X]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[Y]] ; CHECK-NEXT: ret i4 [[R]] ; %im = xor i4 %m, -1 Index: test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll =================================================================== --- test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll +++ test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll @@ -8,10 +8,9 @@ define <2 x i4> @vector (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { ; CHECK-LABEL: @vector( -; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %im = xor <2 x i4> %m, @@ -23,10 +22,9 @@ define <3 x i4> @vector_undef (<3 x i4> %x, <3 x i4> %y, <3 x i4> %m) { ; CHECK-LABEL: @vector_undef( -; CHECK-NEXT: [[IM:%.*]] = xor <3 x i4> [[M:%.*]], ; CHECK-NEXT: [[N0:%.*]] = xor <3 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <3 x i4> [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i4> [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[TMP1]], [[X]] ; CHECK-NEXT: ret <3 x i4> [[R]] ; %im = xor <3 x i4> %m, @@ -47,10 +45,9 @@ define <2 x i4> @c_1_0_0 (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { ; CHECK-LABEL: @c_1_0_0( -; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %im = xor <2 x i4> %m, @@ -62,10 +59,9 @@ define <2 x i4> @c_0_1_0 (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { ; CHECK-LABEL: @c_0_1_0( -; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %im = xor <2 x i4> %m, @@ -77,12 +73,11 @@ define <2 x i4> @c_0_0_1 (<2 x i4> %m) { ; CHECK-LABEL: @c_0_0_1( -; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], ; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X]], [[Y]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[Y]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %im = xor <2 x i4> %m, @@ -96,10 +91,9 @@ define <2 x i4> @c_1_1_0 (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { ; CHECK-LABEL: @c_1_1_0( -; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %im = xor <2 x i4> %m, @@ -111,11 +105,10 @@ define <2 x i4> @c_1_0_1 (<2 x i4> %x, <2 x i4> %m) { ; CHECK-LABEL: @c_1_0_1( -; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], ; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[Y]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %im = xor <2 x i4> %m, @@ -128,11 +121,10 @@ define <2 x i4> @c_0_1_1 (<2 x i4> %y, <2 x i4> %m) { ; CHECK-LABEL: @c_0_1_1( -; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], ; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[X]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %im = xor <2 x i4> %m, @@ -145,12 +137,11 @@ define <2 x i4> @c_1_1_1 (<2 x i4> %m) { ; CHECK-LABEL: @c_1_1_1( -; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], ; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y]], [[X]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[X]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %im = xor <2 x i4> %m, Index: test/Transforms/InstCombine/masked-merge-with-xors.ll =================================================================== --- test/Transforms/InstCombine/masked-merge-with-xors.ll +++ test/Transforms/InstCombine/masked-merge-with-xors.ll @@ -5,10 +5,9 @@ define i32 @f_0(i32 %x, i32 %y, i32 %m) { ; CHECK-LABEL: @f_0( -; CHECK-NEXT: [[IM:%.*]] = xor i32 [[M:%.*]], -1 ; CHECK-NEXT: [[N0:%.*]] = xor i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i32 [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor i32 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i32 [[TMP1]], [[X]] ; CHECK-NEXT: ret i32 [[R]] ; %im = xor i32 %m, -1 @@ -21,10 +20,9 @@ define i32 @f_1(i32 %x, i32 %y, i32 %m) { ; CHECK-LABEL: @f_1( ; CHECK-NEXT: [[IX:%.*]] = xor i32 [[X:%.*]], -1 -; CHECK-NEXT: [[IM:%.*]] = xor i32 [[M:%.*]], -1 ; CHECK-NEXT: [[N0:%.*]] = xor i32 [[IX]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i32 [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor i32 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i32 [[TMP1]], [[IX]] ; CHECK-NEXT: ret i32 [[R]] ; %ix = xor i32 %x, -1 @@ -43,10 +41,9 @@ define i32 @f_2(i32 %x, i32 %y, i32 %m) { ; CHECK-LABEL: @f_2( ; CHECK-NEXT: [[IY:%.*]] = xor i32 [[Y:%.*]], -1 -; CHECK-NEXT: [[IM:%.*]] = xor i32 [[M:%.*]], -1 ; CHECK-NEXT: [[N0:%.*]] = xor i32 [[IY]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i32 [[N0]], [[IM]] -; CHECK-NEXT: [[R:%.*]] = xor i32 [[N1]], [[IY]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i32 [[TMP1]], [[X]] ; CHECK-NEXT: ret i32 [[R]] ; %iy = xor i32 %y, -1