Index: include/llvm/IR/PatternMatch.h =================================================================== --- include/llvm/IR/PatternMatch.h +++ include/llvm/IR/PatternMatch.h @@ -143,11 +143,39 @@ return true; } if (V->getType()->isVectorTy()) - if (const auto *C = dyn_cast(V)) + if (const auto *C = dyn_cast(V)) { if (auto *CI = dyn_cast_or_null(C->getSplatValue())) { Res = &CI->getValue(); return true; } + + // Non-splat vector constant. Find first non-undef element, and then + // check the rest of the elements - they should either be undef, + // or they should match the first non-undef element. + unsigned NumElts = V->getType()->getVectorNumElements(); + assert(NumElts != 0 && "Constant vector with no elements?"); + const APInt *FirstNonUndefElt = nullptr; + for (unsigned i = 0; i != NumElts; ++i) { + Constant *Elt = C->getAggregateElement(i); + if (!Elt) + return false; + if (isa(Elt)) + continue; + auto *CI = dyn_cast(Elt); + if (!CI) + return false; + if (!FirstNonUndefElt) // This is the first non-undef element. + FirstNonUndefElt = &CI->getValue(); + else if (*FirstNonUndefElt != CI->getValue()) // (still non-undef) + return false; // Diferent from the first non-undef element. + } + if (FirstNonUndefElt) { + // There were non-undef elements, and they all matched. + Res = FirstNonUndefElt; + return true; + } + } + return false; } }; @@ -174,6 +202,7 @@ /// Match a ConstantInt or splatted ConstantVector, binding the /// specified pointer to the contained APInt. +/// For vector constants, undefined elements are ignored. inline apint_match m_APInt(const APInt *&Res) { return Res; } /// Match a ConstantFP or splatted ConstantVector, binding the Index: test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll =================================================================== --- test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll +++ test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll @@ -139,9 +139,8 @@ define <3 x i32> @positive_sameconst_vec_undef0(<3 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef0( -; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i32> [[TMP0]], -; CHECK-NEXT: ret <3 x i32> [[RET]] +; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i32> [[X:%.*]], +; CHECK-NEXT: ret <3 x i32> [[TMP1]] ; %tmp0 = ashr <3 x i32> %x, %ret = shl <3 x i32> %tmp0, @@ -150,9 +149,8 @@ define <3 x i32> @positive_sameconst_vec_undef1(<3 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef1( -; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i32> [[TMP0]], -; CHECK-NEXT: ret <3 x i32> [[RET]] +; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i32> [[X:%.*]], +; CHECK-NEXT: ret <3 x i32> [[TMP1]] ; %tmp0 = ashr <3 x i32> %x, %ret = shl <3 x i32> %tmp0, @@ -161,8 +159,8 @@ define <3 x i32> @positive_sameconst_vec_undef2(<3 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef2( -; CHECK-NEXT: [[RET:%.*]] = and <3 x i32> [[X:%.*]], -; CHECK-NEXT: ret <3 x i32> [[RET]] +; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i32> [[X:%.*]], +; CHECK-NEXT: ret <3 x i32> [[TMP1]] ; %tmp0 = ashr <3 x i32> %x, %ret = shl <3 x i32> %tmp0, @@ -183,7 +181,7 @@ define <3 x i32> @positive_biggerashr_vec_undef0(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerashr_vec_undef0( ; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i32> [[TMP0]], +; CHECK-NEXT: [[RET:%.*]] = shl nsw <3 x i32> [[TMP0]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = ashr <3 x i32> %x, @@ -194,7 +192,7 @@ define <3 x i32> @positive_biggerashr_vec_undef1(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerashr_vec_undef1( ; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i32> [[TMP0]], +; CHECK-NEXT: [[RET:%.*]] = shl nsw <3 x i32> [[TMP0]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = ashr <3 x i32> %x, @@ -205,7 +203,7 @@ define <3 x i32> @positive_biggerashr_vec_undef2(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerashr_vec_undef2( ; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i32> [[TMP0]], +; CHECK-NEXT: [[RET:%.*]] = shl nsw <3 x i32> [[TMP0]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = ashr <3 x i32> %x, @@ -226,8 +224,8 @@ define <3 x i32> @positive_biggershl_vec_undef0(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggershl_vec_undef0( -; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = shl <3 x i32> [[TMP1]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = ashr <3 x i32> %x, @@ -237,8 +235,8 @@ define <3 x i32> @positive_biggershl_vec_undef1(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggershl_vec_undef1( -; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = shl <3 x i32> [[TMP1]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = ashr <3 x i32> %x, @@ -248,8 +246,8 @@ define <3 x i32> @positive_biggershl_vec_undef2(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggershl_vec_undef2( -; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = shl <3 x i32> [[TMP1]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = ashr <3 x i32> %x, Index: test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll =================================================================== --- test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll +++ test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll @@ -139,9 +139,8 @@ define <3 x i32> @positive_sameconst_vec_undef0(<3 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef0( -; CHECK-NEXT: [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i32> [[TMP0]], -; CHECK-NEXT: ret <3 x i32> [[RET]] +; CHECK-NEXT: [[TMP0:%.*]] = and <3 x i32> [[X:%.*]], +; CHECK-NEXT: ret <3 x i32> [[TMP0]] ; %tmp0 = lshr <3 x i32> %x, %ret = shl <3 x i32> %tmp0, @@ -150,9 +149,8 @@ define <3 x i32> @positive_sameconst_vec_undef1(<3 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef1( -; CHECK-NEXT: [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i32> [[TMP0]], -; CHECK-NEXT: ret <3 x i32> [[RET]] +; CHECK-NEXT: [[TMP0:%.*]] = and <3 x i32> [[X:%.*]], +; CHECK-NEXT: ret <3 x i32> [[TMP0]] ; %tmp0 = lshr <3 x i32> %x, %ret = shl <3 x i32> %tmp0, @@ -161,8 +159,8 @@ define <3 x i32> @positive_sameconst_vec_undef2(<3 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef2( -; CHECK-NEXT: [[RET:%.*]] = and <3 x i32> [[X:%.*]], -; CHECK-NEXT: ret <3 x i32> [[RET]] +; CHECK-NEXT: [[TMP0:%.*]] = and <3 x i32> [[X:%.*]], +; CHECK-NEXT: ret <3 x i32> [[TMP0]] ; %tmp0 = lshr <3 x i32> %x, %ret = shl <3 x i32> %tmp0, @@ -183,7 +181,7 @@ define <3 x i32> @positive_biggerlshr_vec_undef0(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerlshr_vec_undef0( ; CHECK-NEXT: [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i32> [[TMP0]], +; CHECK-NEXT: [[RET:%.*]] = shl nuw nsw <3 x i32> [[TMP0]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = lshr <3 x i32> %x, @@ -194,7 +192,7 @@ define <3 x i32> @positive_biggerlshr_vec_undef1(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerlshr_vec_undef1( ; CHECK-NEXT: [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i32> [[TMP0]], +; CHECK-NEXT: [[RET:%.*]] = shl nuw nsw <3 x i32> [[TMP0]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = lshr <3 x i32> %x, @@ -205,7 +203,7 @@ define <3 x i32> @positive_biggerlshr_vec_undef2(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerlshr_vec_undef2( ; CHECK-NEXT: [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i32> [[TMP0]], +; CHECK-NEXT: [[RET:%.*]] = shl nuw nsw <3 x i32> [[TMP0]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = lshr <3 x i32> %x, Index: test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll =================================================================== --- test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll +++ test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll @@ -138,9 +138,8 @@ define <3 x i32> @positive_sameconst_vec_undef0(<3 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef0( -; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[TMP0]], -; CHECK-NEXT: ret <3 x i32> [[RET]] +; CHECK-NEXT: [[TMP0:%.*]] = and <3 x i32> [[X:%.*]], +; CHECK-NEXT: ret <3 x i32> [[TMP0]] ; %tmp0 = shl <3 x i32> %x, %ret = lshr <3 x i32> %tmp0, @@ -149,9 +148,8 @@ define <3 x i32> @positive_sameconst_vec_undef1(<3 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef1( -; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[TMP0]], -; CHECK-NEXT: ret <3 x i32> [[RET]] +; CHECK-NEXT: [[TMP0:%.*]] = and <3 x i32> [[X:%.*]], +; CHECK-NEXT: ret <3 x i32> [[TMP0]] ; %tmp0 = shl <3 x i32> %x, %ret = lshr <3 x i32> %tmp0, @@ -160,8 +158,8 @@ define <3 x i32> @positive_sameconst_vec_undef2(<3 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef2( -; CHECK-NEXT: [[RET:%.*]] = and <3 x i32> [[X:%.*]], -; CHECK-NEXT: ret <3 x i32> [[RET]] +; CHECK-NEXT: [[TMP0:%.*]] = and <3 x i32> [[X:%.*]], +; CHECK-NEXT: ret <3 x i32> [[TMP0]] ; %tmp0 = shl <3 x i32> %x, %ret = lshr <3 x i32> %tmp0, @@ -181,8 +179,8 @@ define <3 x i32> @positive_biggerShl_vec_undef0(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerShl_vec_undef0( -; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = and <3 x i32> [[TMP1]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = shl <3 x i32> %x, @@ -192,8 +190,8 @@ define <3 x i32> @positive_biggerShl_vec_undef1(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerShl_vec_undef1( -; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = and <3 x i32> [[TMP1]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = shl <3 x i32> %x, @@ -203,8 +201,8 @@ define <3 x i32> @positive_biggerShl_vec_undef2(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerShl_vec_undef2( -; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = and <3 x i32> [[TMP1]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = shl <3 x i32> %x, @@ -225,8 +223,8 @@ define <3 x i32> @positive_biggerLshr_vec_undef0(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerLshr_vec_undef0( -; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = and <3 x i32> [[TMP1]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = shl <3 x i32> %x, @@ -236,8 +234,8 @@ define <3 x i32> @positive_biggerLshr_vec_undef1(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerLshr_vec_undef1( -; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = and <3 x i32> [[TMP1]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = shl <3 x i32> %x, @@ -247,8 +245,8 @@ define <3 x i32> @positive_biggerLshr_vec_undef2(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerLshr_vec_undef2( -; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = and <3 x i32> [[TMP1]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = shl <3 x i32> %x, Index: test/Transforms/InstCombine/masked-merge-add.ll =================================================================== --- test/Transforms/InstCombine/masked-merge-add.ll +++ test/Transforms/InstCombine/masked-merge-add.ll @@ -108,7 +108,7 @@ ; CHECK-LABEL: @p_constmask_vec_undef( ; CHECK-NEXT: [[AND:%.*]] = and <3 x i32> [[X:%.*]], ; CHECK-NEXT: [[AND1:%.*]] = and <3 x i32> [[Y:%.*]], -; CHECK-NEXT: [[RET:%.*]] = add <3 x i32> [[AND]], [[AND1]] +; CHECK-NEXT: [[RET:%.*]] = or <3 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %and = and <3 x i32> %x, @@ -164,7 +164,7 @@ ; CHECK-LABEL: @p_constmask2_vec_undef( ; CHECK-NEXT: [[AND:%.*]] = and <3 x i32> [[X:%.*]], ; CHECK-NEXT: [[AND1:%.*]] = and <3 x i32> [[Y:%.*]], -; CHECK-NEXT: [[RET:%.*]] = add <3 x i32> [[AND]], [[AND1]] +; CHECK-NEXT: [[RET:%.*]] = or <3 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %and = and <3 x i32> %x, Index: test/Transforms/InstCombine/masked-merge-xor.ll =================================================================== --- test/Transforms/InstCombine/masked-merge-xor.ll +++ test/Transforms/InstCombine/masked-merge-xor.ll @@ -108,8 +108,8 @@ ; CHECK-LABEL: @p_constmask_vec_undef( ; CHECK-NEXT: [[AND:%.*]] = and <3 x i32> [[X:%.*]], ; CHECK-NEXT: [[AND1:%.*]] = and <3 x i32> [[Y:%.*]], -; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[AND]], [[AND1]] -; CHECK-NEXT: ret <3 x i32> [[RET]] +; CHECK-NEXT: [[RET1:%.*]] = or <3 x i32> [[AND]], [[AND1]] +; CHECK-NEXT: ret <3 x i32> [[RET1]] ; %and = and <3 x i32> %x, %and1 = and <3 x i32> %y, @@ -164,8 +164,8 @@ ; CHECK-LABEL: @p_constmask2_vec_undef( ; CHECK-NEXT: [[AND:%.*]] = and <3 x i32> [[X:%.*]], ; CHECK-NEXT: [[AND1:%.*]] = and <3 x i32> [[Y:%.*]], -; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[AND]], [[AND1]] -; CHECK-NEXT: ret <3 x i32> [[RET]] +; CHECK-NEXT: [[RET1:%.*]] = or <3 x i32> [[AND]], [[AND1]] +; CHECK-NEXT: ret <3 x i32> [[RET1]] ; %and = and <3 x i32> %x, %and1 = and <3 x i32> %y, Index: test/Transforms/InstCombine/vec_shuffle.ll =================================================================== --- test/Transforms/InstCombine/vec_shuffle.ll +++ test/Transforms/InstCombine/vec_shuffle.ll @@ -665,7 +665,7 @@ define <2 x i32> @ashr_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @ashr_splat_constant0( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> , [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> , [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[R]] ;