Skip to content

Commit c1416b6

Browse files
committedSep 7, 2018
[InstCombine] narrow vector select with padded condition and extracted result (PR38691)
shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) --> sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask) The motivating case from: https://bugs.llvm.org/show_bug.cgi?id=38691 ...is the last regression test. In that case, we're just left with the narrow select. Note that if we do create new shuffles, they use the existing extraction identity mask, so there's no danger that this transform creates arbitrary shuffles. Differential Revision: https://reviews.llvm.org/D51496 llvm-svn: 341708
1 parent 653278f commit c1416b6

File tree

2 files changed

+62
-22
lines changed

2 files changed

+62
-22
lines changed
 

‎llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

+38
Original file line numberDiff line numberDiff line change
@@ -1350,6 +1350,41 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
13501350
return NewBO;
13511351
}
13521352

1353+
/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
1354+
/// narrowing (concatenating with undef and extracting back to the original
1355+
/// length). This allows replacing the wide select with a narrow select.
1356+
Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
1357+
InstCombiner::BuilderTy &Builder) {
1358+
// This must be a narrowing identity shuffle. It extracts the 1st N elements
1359+
// of the 1st vector operand of a shuffle.
1360+
if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
1361+
return nullptr;
1362+
1363+
// The vector being shuffled must be a vector select that we can eliminate.
1364+
// TODO: The one-use requirement could be eased if X and/or Y are constants.
1365+
Value *Cond, *X, *Y;
1366+
if (!match(Shuf.getOperand(0),
1367+
m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
1368+
return nullptr;
1369+
1370+
// We need a narrow condition value. It must be extended with undef elements
1371+
// and have the same number of elements as this shuffle.
1372+
unsigned NarrowNumElts = Shuf.getType()->getVectorNumElements();
1373+
Value *NarrowCond;
1374+
if (!match(Cond, m_OneUse(m_ShuffleVector(m_Value(NarrowCond), m_Undef(),
1375+
m_Constant()))) ||
1376+
NarrowCond->getType()->getVectorNumElements() != NarrowNumElts ||
1377+
!cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
1378+
return nullptr;
1379+
1380+
// shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
1381+
// sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
1382+
Value *Undef = UndefValue::get(X->getType());
1383+
Value *NarrowX = Builder.CreateShuffleVector(X, Undef, Shuf.getMask());
1384+
Value *NarrowY = Builder.CreateShuffleVector(Y, Undef, Shuf.getMask());
1385+
return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
1386+
}
1387+
13531388
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
13541389
Value *LHS = SVI.getOperand(0);
13551390
Value *RHS = SVI.getOperand(1);
@@ -1360,6 +1395,9 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
13601395
if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
13611396
return I;
13621397

1398+
if (Instruction *I = narrowVectorSelect(SVI, Builder))
1399+
return I;
1400+
13631401
unsigned VWidth = SVI.getType()->getVectorNumElements();
13641402
APInt UndefElts(VWidth, 0);
13651403
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));

‎llvm/test/Transforms/InstCombine/shuffle-select-narrow.ll

+24-22
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt < %s -instcombine -S | FileCheck %s
33

4-
; TODO: Narrow the select operands to eliminate the existing shuffles and replace a wide select with a narrow select.
4+
; Narrow the select operands to eliminate the existing shuffles and replace a wide select with a narrow select.
55

66
define <2 x i8> @narrow_shuffle_of_select(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) {
77
; CHECK-LABEL: @narrow_shuffle_of_select(
8-
; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
9-
; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]]
10-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
8+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
9+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
10+
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[TMP2]]
1111
; CHECK-NEXT: ret <2 x i8> [[R]]
1212
;
1313
%widecmp = shufflevector <2 x i1> %cmp, <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -31,13 +31,13 @@ define <2 x i8> @narrow_shuffle_of_select_overspecified_extend(<2 x i1> %cmp, <4
3131
ret <2 x i8> %r
3232
}
3333

34-
; TODO: Verify that undef elements are acceptable for identity shuffle mask. Also check FP types.
34+
; Verify that undef elements are acceptable for identity shuffle mask. Also check FP types.
3535

3636
define <3 x float> @narrow_shuffle_of_select_undefs(<3 x i1> %cmp, <4 x float> %x, <4 x float> %y) {
3737
; CHECK-LABEL: @narrow_shuffle_of_select_undefs(
38-
; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <3 x i1> [[CMP:%.*]], <3 x i1> undef, <4 x i32> <i32 undef, i32 1, i32 2, i32 undef>
39-
; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]
40-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[WIDESEL]], <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
38+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
39+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
40+
; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[CMP:%.*]], <3 x float> [[TMP1]], <3 x float> [[TMP2]]
4141
; CHECK-NEXT: ret <3 x float> [[R]]
4242
;
4343
%widecmp = shufflevector <3 x i1> %cmp, <3 x i1> undef, <4 x i32> <i32 undef, i32 1, i32 2, i32 undef>
@@ -49,36 +49,42 @@ define <3 x float> @narrow_shuffle_of_select_undefs(<3 x i1> %cmp, <4 x float> %
4949
declare void @use(<4 x i8>)
5050
declare void @use_cmp(<4 x i1>)
5151

52+
; Negative test - extra use would require more instructions than we started with.
53+
5254
define <2 x i8> @narrow_shuffle_of_select_use1(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) {
5355
; CHECK-LABEL: @narrow_shuffle_of_select_use1(
5456
; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
5557
; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]]
56-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
5758
; CHECK-NEXT: call void @use(<4 x i8> [[WIDESEL]])
59+
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
5860
; CHECK-NEXT: ret <2 x i8> [[R]]
5961
;
6062
%widecmp = shufflevector <2 x i1> %cmp, <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
6163
%widesel = select <4 x i1> %widecmp, <4 x i8> %x, <4 x i8> %y
62-
%r = shufflevector <4 x i8> %widesel, <4 x i8> undef, <2 x i32> <i32 0, i32 1>
6364
call void @use(<4 x i8> %widesel)
65+
%r = shufflevector <4 x i8> %widesel, <4 x i8> undef, <2 x i32> <i32 0, i32 1>
6466
ret <2 x i8> %r
6567
}
6668

69+
; Negative test - extra use would require more instructions than we started with.
70+
6771
define <2 x i8> @narrow_shuffle_of_select_use2(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) {
6872
; CHECK-LABEL: @narrow_shuffle_of_select_use2(
6973
; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
74+
; CHECK-NEXT: call void @use_cmp(<4 x i1> [[WIDECMP]])
7075
; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]]
7176
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
72-
; CHECK-NEXT: call void @use_cmp(<4 x i1> [[WIDECMP]])
7377
; CHECK-NEXT: ret <2 x i8> [[R]]
7478
;
7579
%widecmp = shufflevector <2 x i1> %cmp, <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
80+
call void @use_cmp(<4 x i1> %widecmp)
7681
%widesel = select <4 x i1> %widecmp, <4 x i8> %x, <4 x i8> %y
7782
%r = shufflevector <4 x i8> %widesel, <4 x i8> undef, <2 x i32> <i32 0, i32 1>
78-
call void @use_cmp(<4 x i1> %widecmp)
7983
ret <2 x i8> %r
8084
}
8185

86+
; Negative test - mismatched types would require extra shuffling.
87+
8288
define <3 x i8> @narrow_shuffle_of_select_mismatch_types1(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) {
8389
; CHECK-LABEL: @narrow_shuffle_of_select_mismatch_types1(
8490
; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -92,6 +98,8 @@ define <3 x i8> @narrow_shuffle_of_select_mismatch_types1(<2 x i1> %cmp, <4 x i8
9298
ret <3 x i8> %r
9399
}
94100

101+
; Negative test - mismatched types would require extra shuffling.
102+
95103
define <3 x i8> @narrow_shuffle_of_select_mismatch_types2(<4 x i1> %cmp, <6 x i8> %x, <6 x i8> %y) {
96104
; CHECK-LABEL: @narrow_shuffle_of_select_mismatch_types2(
97105
; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef>
@@ -105,13 +113,11 @@ define <3 x i8> @narrow_shuffle_of_select_mismatch_types2(<4 x i1> %cmp, <6 x i8
105113
ret <3 x i8> %r
106114
}
107115

108-
; TODO: Narrowing constants does not require creating new narrowing shuffle instructions.
116+
; Narrowing constants does not require creating new narrowing shuffle instructions.
109117

110118
define <2 x i8> @narrow_shuffle_of_select_consts(<2 x i1> %cmp) {
111119
; CHECK-LABEL: @narrow_shuffle_of_select_consts(
112-
; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
113-
; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> <i8 -1, i8 -2, i8 undef, i8 undef>, <4 x i8> <i8 1, i8 2, i8 undef, i8 undef>
114-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
120+
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> <i8 -1, i8 -2>, <2 x i8> <i8 1, i8 2>
115121
; CHECK-NEXT: ret <2 x i8> [[R]]
116122
;
117123
%widecmp = shufflevector <2 x i1> %cmp, <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -121,15 +127,11 @@ define <2 x i8> @narrow_shuffle_of_select_consts(<2 x i1> %cmp) {
121127
}
122128

123129
; PR38691 - https://bugs.llvm.org/show_bug.cgi?id=38691
124-
; TODO: If the operands are widened only to be narrowed back, then all of the shuffles are unnecessary.
130+
; If the operands are widened only to be narrowed back, then all of the shuffles are unnecessary.
125131

126132
define <2 x i8> @narrow_shuffle_of_select_with_widened_ops(<2 x i1> %cmp, <2 x i8> %x, <2 x i8> %y) {
127133
; CHECK-LABEL: @narrow_shuffle_of_select_with_widened_ops(
128-
; CHECK-NEXT: [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
129-
; CHECK-NEXT: [[WIDEY:%.*]] = shufflevector <2 x i8> [[Y:%.*]], <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
130-
; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
131-
; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[WIDEX]], <4 x i8> [[WIDEY]]
132-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
134+
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]
133135
; CHECK-NEXT: ret <2 x i8> [[R]]
134136
;
135137
%widex = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>

0 commit comments

Comments
 (0)
Please sign in to comment.