Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2412,14 +2412,19 @@ } /// If we have a masked merge, in the canonical form of: +/// (assuming that A only has one use.) /// | A | |B| /// ((x ^ y) & M) ^ y /// | D | /// * If M is inverted: /// | D | /// ((x ^ y) & ~M) ^ y -/// If A has one use, and, we want to canonicalize it to non-inverted mask: +/// We can canonicalize by swapping the final xor operand +/// to eliminate the 'not' of the mask. /// ((x ^ y) & M) ^ x +/// * If M is a constant, and D has one use, we transform to 'and' / 'or' ops +/// because that shortens the dependency chain and improves analysis: +/// (x & M) | (y & ~M) static Instruction *visitMaskedMerge(BinaryOperator &I, InstCombiner::BuilderTy &Builder) { Value *B, *X, *D; @@ -2438,6 +2443,15 @@ return BinaryOperator::CreateXor(NewA, X); } + Constant *C; + if (D->hasOneUse() && match(M, m_Constant(C))) { + // Unfold. + Value *LHS = Builder.CreateAnd(X, C); + Value *NotC = Builder.CreateNot(C); + Value *RHS = Builder.CreateAnd(B, NotC); + return BinaryOperator::CreateOr(LHS, RHS); + } + return nullptr; } Index: llvm/trunk/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-scalar.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-scalar.ll +++ llvm/trunk/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-scalar.ll @@ -8,9 +8,9 @@ define i4 @scalar0 (i4 %x, i4 %y) { ; CHECK-LABEL: @scalar0( -; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], 1 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i4 [[Y:%.*]], -2 +; CHECK-NEXT: [[R:%.*]] = or i4 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i4 [[R]] ; %n0 = xor i4 %x, %y @@ -21,9 +21,9 @@ define i4 @scalar1 (i4 %x, i4 %y) { ; CHECK-LABEL: @scalar1( -; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[X:%.*]], -2 +; CHECK-NEXT: [[TMP2:%.*]] = and i4 [[Y:%.*]], 1 +; CHECK-NEXT: [[R:%.*]] = or i4 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i4 [[R]] ; %n0 = xor i4 %x, %y @@ -91,9 +91,9 @@ define i4 @c_1_0_0 (i4 %x, i4 %y) { ; CHECK-LABEL: @c_1_0_0( -; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[X:%.*]], -2 +; CHECK-NEXT: [[TMP2:%.*]] = and i4 [[Y:%.*]], 1 +; CHECK-NEXT: [[R:%.*]] = or i4 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i4 [[R]] ; %n0 = xor i4 %y, %x ; swapped order @@ -104,9 +104,9 @@ define i4 @c_0_1_0 (i4 %x, i4 %y) { ; CHECK-LABEL: @c_0_1_0( -; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[Y:%.*]], -2 +; CHECK-NEXT: [[TMP2:%.*]] = and i4 [[X:%.*]], 1 +; CHECK-NEXT: [[R:%.*]] = or i4 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i4 [[R]] ; %n0 = xor i4 %x, %y @@ -119,9 +119,9 @@ ; CHECK-LABEL: @c_0_0_1( ; CHECK-NEXT: [[X:%.*]] = call i4 @gen4() ; CHECK-NEXT: [[Y:%.*]] = call i4 @gen4() -; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X]], [[Y]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[Y]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[X]], -2 +; CHECK-NEXT: [[TMP2:%.*]] = and i4 [[Y]], 1 +; CHECK-NEXT: [[R:%.*]] = or i4 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i4 [[R]] ; %x = call i4 @gen4() @@ -134,9 +134,9 @@ define i4 @c_1_1_0 (i4 %x, i4 %y) { ; CHECK-LABEL: @c_1_1_0( -; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[Y:%.*]], -2 +; CHECK-NEXT: [[TMP2:%.*]] = and i4 [[X:%.*]], 1 +; CHECK-NEXT: [[R:%.*]] = or i4 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i4 [[R]] ; %n0 = xor i4 %y, %x ; swapped order @@ -148,9 +148,9 @@ define i4 @c_1_0_1 (i4 %x) { ; CHECK-LABEL: @c_1_0_1( ; CHECK-NEXT: [[Y:%.*]] = call i4 @gen4() -; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[Y]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[X:%.*]], -2 +; CHECK-NEXT: [[TMP2:%.*]] = and i4 [[Y]], 1 +; CHECK-NEXT: [[R:%.*]] = or i4 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i4 [[R]] ; %y = call i4 @gen4() @@ -163,9 +163,9 @@ define i4 @c_0_1_1 (i4 %y) { ; CHECK-LABEL: @c_0_1_1( ; CHECK-NEXT: [[X:%.*]] = call i4 @gen4() -; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[X]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[Y:%.*]], -2 +; CHECK-NEXT: [[TMP2:%.*]] = and i4 [[X]], 1 +; CHECK-NEXT: [[R:%.*]] = or i4 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i4 [[R]] ; %x = call i4 @gen4() @@ -179,9 +179,9 @@ ; CHECK-LABEL: @c_1_1_1( ; CHECK-NEXT: [[X:%.*]] = call i4 @gen4() ; CHECK-NEXT: [[Y:%.*]] = call i4 @gen4() -; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y]], [[X]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[X]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[Y]], -2 +; CHECK-NEXT: [[TMP2:%.*]] = and i4 [[X]], 1 +; CHECK-NEXT: [[R:%.*]] = or i4 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i4 [[R]] ; %x = call i4 @gen4() Index: llvm/trunk/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-vector.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-vector.ll +++ llvm/trunk/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-vector.ll @@ -8,9 +8,9 @@ define <2 x i4> @splat (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @splat( -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %n0 = xor <2 x i4> %x, %y @@ -21,9 +21,9 @@ define <3 x i4> @splat_undef (<3 x i4> %x, <3 x i4> %y) { ; CHECK-LABEL: @splat_undef( -; CHECK-NEXT: [[N0:%.*]] = xor <3 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <3 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i4> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i4> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <3 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <3 x i4> [[R]] ; %n0 = xor <3 x i4> %x, %y @@ -34,9 +34,9 @@ define <2 x i4> @nonsplat (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @nonsplat( -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %n0 = xor <2 x i4> %x, %y @@ -73,9 +73,8 @@ define <2 x i4> @in_constant_varx_14_nonsplat(<2 x i4> %x, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_varx_14_nonsplat( -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[TMP1]], ; CHECK-NEXT: ret <2 x i4> [[R]] ; %n0 = xor <2 x i4> %x, ; %x @@ -86,9 +85,8 @@ define <3 x i4> @in_constant_varx_14_undef(<3 x i4> %x, <3 x i4> %mask) { ; CHECK-LABEL: @in_constant_varx_14_undef( -; CHECK-NEXT: [[N0:%.*]] = xor <3 x i4> [[X:%.*]], -; CHECK-NEXT: [[N1:%.*]] = and <3 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[N1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i4> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <3 x i4> [[TMP1]], ; CHECK-NEXT: ret <3 x i4> [[R]] ; %n0 = xor <3 x i4> %x, ; %x @@ -123,9 +121,8 @@ define <2 x i4> @in_constant_14_vary_nonsplat(<2 x i4> %y, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_14_vary_nonsplat( -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[TMP1]], ; CHECK-NEXT: ret <2 x i4> [[R]] ; %n0 = xor <2 x i4> %y, ; %x @@ -136,9 +133,8 @@ define <3 x i4> @in_constant_14_vary_undef(<3 x i4> %y, <3 x i4> %mask) { ; CHECK-LABEL: @in_constant_14_vary_undef( -; CHECK-NEXT: [[N0:%.*]] = xor <3 x i4> [[Y:%.*]], -; CHECK-NEXT: [[N1:%.*]] = and <3 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i4> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <3 x i4> [[TMP1]], ; CHECK-NEXT: ret <3 x i4> [[R]] ; %n0 = xor <3 x i4> %y, ; %x @@ -156,9 +152,9 @@ define <2 x i4> @c_1_0_0 (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @c_1_0_0( -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %n0 = xor <2 x i4> %y, %x ; swapped order @@ -169,9 +165,9 @@ define <2 x i4> @c_0_1_0 (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @c_0_1_0( -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %n0 = xor <2 x i4> %x, %y @@ -184,9 +180,9 @@ ; CHECK-LABEL: @c_0_0_1( ; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X]], [[Y]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[Y]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %x = call <2 x i4> @gen4() @@ -199,9 +195,9 @@ define <2 x i4> @c_1_1_0 (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @c_1_1_0( -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %n0 = xor <2 x i4> %y, %x ; swapped order @@ -213,9 +209,9 @@ define <2 x i4> @c_1_0_1 (<2 x i4> %x) { ; CHECK-LABEL: @c_1_0_1( ; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[Y]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %y = call <2 x i4> @gen4() @@ -228,9 +224,9 @@ define <2 x i4> @c_0_1_1 (<2 x i4> %y) { ; CHECK-LABEL: @c_0_1_1( ; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[X]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %x = call <2 x i4> @gen4() @@ -244,9 +240,9 @@ ; CHECK-LABEL: @c_1_1_1( ; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y]], [[X]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[X]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %x = call <2 x i4> @gen4()