Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1350,6 +1350,41 @@ return NewBO; } +/// Match a shuffle-select-shuffle pattern where the shuffles are widening and +/// narrowing (concatenating with undef and extracting back to the original +/// length). This allows replacing the wide select with a narrow select. +Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf, + InstCombiner::BuilderTy &Builder) { + // This must be a narrowing identity shuffle. It extracts the 1st N elements + // of the 1st vector operand of a shuffle. + if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract()) + return nullptr; + + // The vector being shuffled must be a vector select that we can eliminate. + // TODO: The one-use requirement could be eased if X and/or Y are constants. + Value *Cond, *X, *Y; + if (!match(Shuf.getOperand(0), + m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y))))) + return nullptr; + + // We need a narrow condition value. It must be extended with undef elements + // and have the same number of elements as this shuffle. + unsigned NarrowNumElts = Shuf.getType()->getVectorNumElements(); + Value *NarrowCond; + if (!match(Cond, m_OneUse(m_ShuffleVector(m_Value(NarrowCond), m_Undef(), + m_Constant()))) || + NarrowCond->getType()->getVectorNumElements() != NarrowNumElts || + !cast(Cond)->isIdentityWithPadding()) + return nullptr; + + // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) --> + // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask) + Value *Undef = UndefValue::get(X->getType()); + Value *NarrowX = Builder.CreateShuffleVector(X, Undef, Shuf.getMask()); + Value *NarrowY = Builder.CreateShuffleVector(Y, Undef, Shuf.getMask()); + return SelectInst::Create(NarrowCond, NarrowX, NarrowY); +} + Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); @@ -1360,6 +1395,9 @@ if (Instruction *I = foldSelectShuffle(SVI, Builder, DL)) return I; + if (Instruction *I = narrowVectorSelect(SVI, Builder)) + return I; + unsigned VWidth = SVI.getType()->getVectorNumElements(); APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); Index: test/Transforms/InstCombine/shuffle-select-narrow.ll =================================================================== --- test/Transforms/InstCombine/shuffle-select-narrow.ll +++ test/Transforms/InstCombine/shuffle-select-narrow.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s -; TODO: Narrow the select operands to eliminate the existing shuffles and replace a wide select with a narrow select. +; Narrow the select operands to eliminate the existing shuffles and replace a wide select with a narrow select. define <2 x i8> @narrow_shuffle_of_select(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select( -; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> -; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> undef, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <2 x i32> +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[TMP2]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %widecmp = shufflevector <2 x i1> %cmp, <2 x i1> undef, <4 x i32> @@ -31,13 +31,13 @@ ret <2 x i8> %r } -; TODO: Verify that undef elements are acceptable for identity shuffle mask. Also check FP types. +; Verify that undef elements are acceptable for identity shuffle mask. Also check FP types. define <3 x float> @narrow_shuffle_of_select_undefs(<3 x i1> %cmp, <4 x float> %x, <4 x float> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_undefs( -; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <3 x i1> [[CMP:%.*]], <3 x i1> undef, <4 x i32> -; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[WIDESEL]], <4 x float> undef, <3 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <3 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[CMP:%.*]], <3 x float> [[TMP1]], <3 x float> [[TMP2]] ; CHECK-NEXT: ret <3 x float> [[R]] ; %widecmp = shufflevector <3 x i1> %cmp, <3 x i1> undef, <4 x i32> @@ -49,6 +49,8 @@ declare void @use(<4 x i8>) declare void @use_cmp(<4 x i1>) +; Negative test - extra use would require more instructions than we started with. + define <2 x i8> @narrow_shuffle_of_select_use1(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_use1( ; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> @@ -64,6 +66,8 @@ ret <2 x i8> %r } +; Negative test - extra use would require more instructions than we started with. + define <2 x i8> @narrow_shuffle_of_select_use2(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_use2( ; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> @@ -79,6 +83,8 @@ ret <2 x i8> %r } +; Negative test - mismatched types would require extra shuffling. + define <3 x i8> @narrow_shuffle_of_select_mismatch_types1(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_mismatch_types1( ; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> @@ -92,6 +98,8 @@ ret <3 x i8> %r } +; Negative test - mismatched types would require extra shuffling. + define <3 x i8> @narrow_shuffle_of_select_mismatch_types2(<4 x i1> %cmp, <6 x i8> %x, <6 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_mismatch_types2( ; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> undef, <6 x i32> @@ -105,13 +113,11 @@ ret <3 x i8> %r } -; TODO: Narrowing constants does not require creating new narrowing shuffle instructions. +; Narrowing constants does not require creating new narrowing shuffle instructions. define <2 x i8> @narrow_shuffle_of_select_consts(<2 x i1> %cmp) { ; CHECK-LABEL: @narrow_shuffle_of_select_consts( -; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> -; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> , <4 x i8> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> undef, <2 x i32> +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> , <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[R]] ; %widecmp = shufflevector <2 x i1> %cmp, <2 x i1> undef, <4 x i32> @@ -121,15 +127,11 @@ } ; PR38691 - https://bugs.llvm.org/show_bug.cgi?id=38691 -; TODO: If the operands are widened only to be narrowed back, then all of the shuffles are unnecessary. +; If the operands are widened only to be narrowed back, then all of the shuffles are unnecessary. define <2 x i8> @narrow_shuffle_of_select_with_widened_ops(<2 x i1> %cmp, <2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_with_widened_ops( -; CHECK-NEXT: [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> -; CHECK-NEXT: [[WIDEY:%.*]] = shufflevector <2 x i8> [[Y:%.*]], <2 x i8> undef, <4 x i32> -; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> -; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[WIDEX]], <4 x i8> [[WIDEY]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> undef, <2 x i32> +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %widex = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32>