Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2418,23 +2418,91 @@ return nullptr; } +/// This is cst_pred_ty, but with bias - if more than half of elements matched, +/// then the result is true. undef is skipped, not counted as a match. +template struct _cst_biased_pred_ty : public Predicate { + template bool match(ITy *V) { + if (const auto *CI = dyn_cast(V)) + return this->isValue(CI->getValue()); + if (V->getType()->isVectorTy()) { + if (const auto *C = dyn_cast(V)) { + if (const auto *CI = dyn_cast_or_null(C->getSplatValue())) + return this->isValue(CI->getValue()); + + // Non-splat vector constant: check each element for a match. + unsigned NumElts = V->getType()->getVectorNumElements(); + assert(NumElts != 0 && "Constant vector with no elements?"); + unsigned matched = 0; + for (unsigned i = 0; i != NumElts; ++i) { + Constant *Elt = C->getAggregateElement(i); + if (!Elt) + return false; + if (isa(Elt)) + continue; // Do *NOT* increase 'matched'! + auto *CI = dyn_cast(Elt); + if (!CI) + return false; + if (this->isValue(CI->getValue())) + ++matched; + } + const unsigned notMatched = NumElts - matched; + return matched > notMatched; + } + } + return false; + } +}; + +/// Never invert if it will increase count of set bits. +/// If it decreases the count of set bits - do invert. +/// Else, if it decreases the zero-extended value of the mask - do invert. +/// NOTE: We must use , not <=/>= to prevent [un]doing our own changes. +static bool isNonCanonicalMask(const APInt &m) { + const unsigned setBits = m.countPopulation(); + const unsigned unsetBits = m.getBitWidth() - setBits; + if (setBits != unsetBits) + return setBits > unsetBits; + + assert(setBits == unsetBits); + return (~m).getZExtValue() < m.getZExtValue(); +} + +struct is_noncanonical_mask { + bool isValue(const APInt &C) { return isNonCanonicalMask(C); } +}; +inline _cst_biased_pred_ty _NonCanonicalMask() { + return _cst_biased_pred_ty(); +} + /// If we have a masked merge, in the form of: /// | A | |B| /// ((x ^ y) & M) ^ y /// | D | -/// If A has one use, and M is inverted, we can convert it into: +/// If A has one use, and M is inverted, or is is non-canonical constant, +/// we can convert it into: /// | D | /// ((x ^ y) & ~M) ^ x static Instruction *invertMaskInMaskedMerge(BinaryOperator &I, InstCombiner::BuilderTy &Builder) { - Value *D, *M, *B, *X, *Y; - if (!(match(&I, m_c_Xor(m_OneUse(m_And(m_Value(D), m_Not(m_Value(M)))), - m_Value(B))) && + Value *D, *B, *X, *Y; + Value *M = nullptr; + Constant *Mc; + if (!(match(&I, + m_c_Xor(m_OneUse(m_And(m_Value(D), m_CombineOr(m_Not(m_Value(M)), + m_Constant(Mc)))), + m_Value(B))) && + ((Mc && match(Mc, _NonCanonicalMask())) || !Mc) && match(D, m_Xor(m_Value(X), m_Value(Y))) && match(B, m_CombineOr(m_Specific(X), m_Specific(Y))))) return nullptr; - // De-invert the mask and swap the value in B part. + // We either have the inverted non-constant mask, or non-canonical constant. + assert((!M && Mc) || (M && !Mc)); + // If we have non-canonical constant mask, invert it. Else just use the mask. + if (!M) + M = Builder.CreateNot(Mc); + + // Use canonical mask and swap the value in B part. Value *NewA = Builder.CreateAnd(D, M); Value *NewB = (B == Y) ? X : Y; return BinaryOperator::CreateXor(NewA, NewB); Index: test/Transforms/InstCombine/invert-const-mask-in-masked-merge-scalar.ll =================================================================== --- test/Transforms/InstCombine/invert-const-mask-in-masked-merge-scalar.ll +++ test/Transforms/InstCombine/invert-const-mask-in-masked-merge-scalar.ll @@ -41,8 +41,8 @@ define i3 @t_3_3 (i3 %x, i3 %y) { ; CHECK-LABEL: @t_3_3( ; CHECK-NEXT: [[N0:%.*]] = xor i3 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i3 [[N0]], 3 -; CHECK-NEXT: [[R:%.*]] = xor i3 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i3 [[N0]], -4 +; CHECK-NEXT: [[R:%.*]] = xor i3 [[TMP1]], [[X]] ; CHECK-NEXT: ret i3 [[R]] ; %n0 = xor i3 %x, %y @@ -67,8 +67,8 @@ define i3 @t_3_5 (i3 %x, i3 %y) { ; CHECK-LABEL: @t_3_5( ; CHECK-NEXT: [[N0:%.*]] = xor i3 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i3 [[N0]], -3 -; CHECK-NEXT: [[R:%.*]] = xor i3 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i3 [[N0]], 2 +; CHECK-NEXT: [[R:%.*]] = xor i3 [[TMP1]], [[X]] ; CHECK-NEXT: ret i3 [[R]] ; %n0 = xor i3 %x, %y @@ -80,8 +80,8 @@ define i3 @t_3_6 (i3 %x, i3 %y) { ; CHECK-LABEL: @t_3_6( ; CHECK-NEXT: [[N0:%.*]] = xor i3 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i3 [[N0]], -2 -; CHECK-NEXT: [[R:%.*]] = xor i3 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i3 [[N0]], 1 +; CHECK-NEXT: [[R:%.*]] = xor i3 [[TMP1]], [[X]] ; CHECK-NEXT: ret i3 [[R]] ; %n0 = xor i3 %x, %y @@ -171,8 +171,8 @@ define i4 @t_4_7 (i4 %x, i4 %y) { ; CHECK-LABEL: @t_4_7( ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], 7 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], -8 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[X]] ; CHECK-NEXT: ret i4 [[R]] ; %n0 = xor i4 %x, %y @@ -197,8 +197,8 @@ define i4 @t_4_9 (i4 %x, i4 %y) { ; CHECK-LABEL: @t_4_9( ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -7 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], 6 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[X]] ; CHECK-NEXT: ret i4 [[R]] ; %n0 = xor i4 %x, %y @@ -210,8 +210,8 @@ define i4 @t_4_10 (i4 %x, i4 %y) { ; CHECK-LABEL: @t_4_10( ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -6 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], 5 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[X]] ; CHECK-NEXT: ret i4 [[R]] ; %n0 = xor i4 %x, %y @@ -223,8 +223,8 @@ define i4 @t_4_11 (i4 %x, i4 %y) { ; CHECK-LABEL: @t_4_11( ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -5 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], 4 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[X]] ; CHECK-NEXT: ret i4 [[R]] ; %n0 = xor i4 %x, %y @@ -236,8 +236,8 @@ define i4 @t_4_12 (i4 %x, i4 %y) { ; CHECK-LABEL: @t_4_12( ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -4 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], 3 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[X]] ; CHECK-NEXT: ret i4 [[R]] ; %n0 = xor i4 %x, %y @@ -249,8 +249,8 @@ define i4 @t_4_13 (i4 %x, i4 %y) { ; CHECK-LABEL: @t_4_13( ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -3 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], 2 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[X]] ; CHECK-NEXT: ret i4 [[R]] ; %n0 = xor i4 %x, %y @@ -262,8 +262,8 @@ define i4 @t_4_14 (i4 %x, i4 %y) { ; CHECK-LABEL: @t_4_14( ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], 1 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[X]] ; CHECK-NEXT: ret i4 [[R]] ; %n0 = xor i4 %x, %y @@ -282,8 +282,8 @@ define i4 @c_1_0_0 (i4 %x, i4 %y) { ; CHECK-LABEL: @c_1_0_0( ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], 1 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[X]] ; CHECK-NEXT: ret i4 [[R]] ; %n0 = xor i4 %y, %x ; swapped order @@ -295,8 +295,8 @@ define i4 @c_0_1_0 (i4 %x, i4 %y) { ; CHECK-LABEL: @c_0_1_0( ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], 1 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[Y]] ; CHECK-NEXT: ret i4 [[R]] ; %n0 = xor i4 %x, %y @@ -310,8 +310,8 @@ ; CHECK-NEXT: [[X:%.*]] = call i4 @gen4() ; CHECK-NEXT: [[Y:%.*]] = call i4 @gen4() ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X]], [[Y]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[Y]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], 1 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[X]] ; CHECK-NEXT: ret i4 [[R]] ; %x = call i4 @gen4() @@ -325,8 +325,8 @@ define i4 @c_1_1_0 (i4 %x, i4 %y) { ; CHECK-LABEL: @c_1_1_0( ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], 1 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[Y]] ; CHECK-NEXT: ret i4 [[R]] ; %n0 = xor i4 %y, %x ; swapped order @@ -339,8 +339,8 @@ ; CHECK-LABEL: @c_1_0_1( ; CHECK-NEXT: [[Y:%.*]] = call i4 @gen4() ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[Y]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], 1 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[X]] ; CHECK-NEXT: ret i4 [[R]] ; %y = call i4 @gen4() @@ -354,8 +354,8 @@ ; CHECK-LABEL: @c_0_1_1( ; CHECK-NEXT: [[X:%.*]] = call i4 @gen4() ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[X]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], 1 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[Y]] ; CHECK-NEXT: ret i4 [[R]] ; %x = call i4 @gen4() @@ -370,8 +370,8 @@ ; CHECK-NEXT: [[X:%.*]] = call i4 @gen4() ; CHECK-NEXT: [[Y:%.*]] = call i4 @gen4() ; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y]], [[X]] -; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 -; CHECK-NEXT: [[R:%.*]] = xor i4 [[X]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and i4 [[N0]], 1 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[TMP1]], [[Y]] ; CHECK-NEXT: ret i4 [[R]] ; %x = call i4 @gen4() Index: test/Transforms/InstCombine/invert-const-mask-in-masked-merge-vector.ll =================================================================== --- test/Transforms/InstCombine/invert-const-mask-in-masked-merge-vector.ll +++ test/Transforms/InstCombine/invert-const-mask-in-masked-merge-vector.ll @@ -15,8 +15,8 @@ define <2 x i4> @splat (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @splat( ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %n0 = xor <2 x i4> %x, %y @@ -28,8 +28,8 @@ define <3 x i4> @splat_undef (<3 x i4> %x, <3 x i4> %y) { ; CHECK-LABEL: @splat_undef( ; CHECK-NEXT: [[N0:%.*]] = xor <3 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <3 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[TMP1]], [[X]] ; CHECK-NEXT: ret <3 x i4> [[R]] ; %n0 = xor <3 x i4> %x, %y @@ -41,8 +41,8 @@ define <3 x i4> @nonsplat (<3 x i4> %x, <3 x i4> %y) { ; CHECK-LABEL: @nonsplat( ; CHECK-NEXT: [[N0:%.*]] = xor <3 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <3 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[TMP1]], [[X]] ; CHECK-NEXT: ret <3 x i4> [[R]] ; %n0 = xor <3 x i4> %x, %y @@ -61,8 +61,8 @@ define <2 x i4> @c_1_0_0 (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @c_1_0_0( ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %n0 = xor <2 x i4> %y, %x ; swapped order @@ -74,8 +74,8 @@ define <2 x i4> @c_0_1_0 (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @c_0_1_0( ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %n0 = xor <2 x i4> %x, %y @@ -89,8 +89,8 @@ ; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X]], [[Y]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[Y]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %x = call <2 x i4> @gen4() @@ -104,8 +104,8 @@ define <2 x i4> @c_1_1_0 (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @c_1_1_0( ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %n0 = xor <2 x i4> %y, %x ; swapped order @@ -118,8 +118,8 @@ ; CHECK-LABEL: @c_1_0_1( ; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y]], [[X:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[Y]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %y = call <2 x i4> @gen4() @@ -133,8 +133,8 @@ ; CHECK-LABEL: @c_0_1_1( ; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[X]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %x = call <2 x i4> @gen4() @@ -149,8 +149,8 @@ ; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y]], [[X]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[X]], [[N1]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %x = call <2 x i4> @gen4()