Skip to content

Commit 9bece44

Browse files
committedAug 7, 2019
[InstCombine] Recommit: Shift amount reassociation: shl-trunc-shl pattern
This was initially committed in r368059 but got reverted in r368084 because there was a faulty logic in how the shift amounts type mismatch was being handled (it simply wasn't). I've added an explicit bailout before we SimplifyAddInst() - i don't think it's designed in general to handle differently-typed values, even though the actual problem only comes from ConstantExpr's. I have also changed the common type deduction, to not just blindly look past zext, but try to do that so that in the end types match. Differential Revision: https://reviews.llvm.org/D65380 llvm-svn: 368141
1 parent 12d21fc commit 9bece44

File tree

3 files changed

+112
-68
lines changed

3 files changed

+112
-68
lines changed
 

‎llvm/include/llvm/IR/PatternMatch.h

+6
Original file line numberDiff line numberDiff line change
@@ -1270,6 +1270,12 @@ inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) {
12701270
return CastClass_match<OpTy, Instruction::ZExt>(Op);
12711271
}
12721272

1273+
template <typename OpTy>
1274+
inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, OpTy>
1275+
m_ZExtOrSelf(const OpTy &Op) {
1276+
return m_CombineOr(m_ZExt(Op), Op);
1277+
}
1278+
12731279
template <typename OpTy>
12741280
inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
12751281
CastClass_match<OpTy, Instruction::SExt>>

‎llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

+72-24
Original file line numberDiff line numberDiff line change
@@ -27,42 +27,90 @@ using namespace PatternMatch;
2727
// This is valid for any shift, but they must be identical.
2828
static Instruction *
2929
reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0,
30-
const SimplifyQuery &SQ) {
31-
// Look for: (x shiftopcode ShAmt0) shiftopcode ShAmt1
32-
Value *X, *ShAmt1, *ShAmt0;
30+
const SimplifyQuery &SQ,
31+
InstCombiner::BuilderTy &Builder) {
32+
// Look for a shift of some instruction, ignore zext of shift amount if any.
33+
Instruction *Sh0Op0;
34+
Value *ShAmt0;
35+
if (!match(Sh0,
36+
m_Shift(m_Instruction(Sh0Op0), m_ZExtOrSelf(m_Value(ShAmt0)))))
37+
return nullptr;
38+
39+
// If there is a truncation between the two shifts, we must make note of it
40+
// and look through it. The truncation imposes additional constraints on the
41+
// transform.
3342
Instruction *Sh1;
34-
if (!match(Sh0, m_Shift(m_CombineAnd(m_Shift(m_Value(X), m_Value(ShAmt1)),
35-
m_Instruction(Sh1)),
36-
m_Value(ShAmt0))))
43+
Value *Trunc = nullptr;
44+
match(Sh0Op0,
45+
m_CombineOr(m_CombineAnd(m_Trunc(m_Instruction(Sh1)), m_Value(Trunc)),
46+
m_Instruction(Sh1)));
47+
48+
// Inner shift: (x shiftopcode ShAmt1)
49+
// Like with other shift, ignore zext of shift amount if any.
50+
Value *X, *ShAmt1;
51+
if (!match(Sh1, m_Shift(m_Value(X), m_ZExtOrSelf(m_Value(ShAmt1)))))
52+
return nullptr;
53+
54+
// We have two shift amounts from two different shifts. The types of those
55+
// shift amounts may not match. If that's the case let's bailout now..
56+
if (ShAmt0->getType() != ShAmt1->getType())
3757
return nullptr;
3858

3959
// The shift opcodes must be identical.
4060
Instruction::BinaryOps ShiftOpcode = Sh0->getOpcode();
4161
if (ShiftOpcode != Sh1->getOpcode())
4262
return nullptr;
63+
64+
// Did we match a pattern with truncation ?
65+
if (Trunc) {
66+
// For right-shifts we can't do any such simplifications. Leave as-is.
67+
if (ShiftOpcode != Instruction::BinaryOps::Shl)
68+
return nullptr; // FIXME: still could perform constant-folding.
69+
// If we saw truncation, we'll need to produce extra instruction,
70+
// and for that one of the operands of the shift must be one-use.
71+
if (!match(Sh0, m_c_BinOp(m_OneUse(m_Value()), m_Value())))
72+
return nullptr;
73+
}
74+
4375
// Can we fold (ShAmt0+ShAmt1) ?
44-
Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, ShAmt0, ShAmt1,
45-
SQ.getWithInstruction(Sh0));
76+
auto *NewShAmt = dyn_cast_or_null<Constant>(
77+
SimplifyAddInst(ShAmt0, ShAmt1, /*isNSW=*/false, /*isNUW=*/false,
78+
SQ.getWithInstruction(Sh0)));
4679
if (!NewShAmt)
4780
return nullptr; // Did not simplify.
48-
// Is the new shift amount smaller than the bit width?
49-
// FIXME: could also rely on ConstantRange.
50-
unsigned BitWidth = X->getType()->getScalarSizeInBits();
51-
if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
52-
APInt(BitWidth, BitWidth))))
53-
return nullptr;
81+
// Is the new shift amount smaller than the bit width of inner shift?
82+
if (!match(NewShAmt, m_SpecificInt_ICMP(
83+
ICmpInst::Predicate::ICMP_ULT,
84+
APInt(NewShAmt->getType()->getScalarSizeInBits(),
85+
X->getType()->getScalarSizeInBits()))))
86+
return nullptr; // FIXME: could perform constant-folding.
87+
5488
// All good, we can do this fold.
89+
NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, X->getType());
90+
5591
BinaryOperator *NewShift = BinaryOperator::Create(ShiftOpcode, X, NewShAmt);
56-
// If both of the original shifts had the same flag set, preserve the flag.
57-
if (ShiftOpcode == Instruction::BinaryOps::Shl) {
58-
NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() &&
59-
Sh1->hasNoUnsignedWrap());
60-
NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() &&
61-
Sh1->hasNoSignedWrap());
62-
} else {
63-
NewShift->setIsExact(Sh0->isExact() && Sh1->isExact());
92+
93+
// The flags can only be propagated if there wasn't a trunc.
94+
if (!Trunc) {
95+
// If the pattern did not involve trunc, and both of the original shifts
96+
// had the same flag set, preserve the flag.
97+
if (ShiftOpcode == Instruction::BinaryOps::Shl) {
98+
NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() &&
99+
Sh1->hasNoUnsignedWrap());
100+
NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() &&
101+
Sh1->hasNoSignedWrap());
102+
} else {
103+
NewShift->setIsExact(Sh0->isExact() && Sh1->isExact());
104+
}
105+
}
106+
107+
Instruction *Ret = NewShift;
108+
if (Trunc) {
109+
Builder.Insert(NewShift);
110+
Ret = CastInst::Create(Instruction::Trunc, NewShift, Sh0->getType());
64111
}
65-
return NewShift;
112+
113+
return Ret;
66114
}
67115

68116
// If we have some pattern that leaves only some low bits set, and then performs
@@ -158,7 +206,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
158206
return Res;
159207

160208
if (Instruction *NewShift =
161-
reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ))
209+
reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ, Builder))
162210
return NewShift;
163211

164212
// (C1 shift (A add C2)) -> (C1 shift C2) shift A)

‎llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll

+34-44
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,8 @@
1212

1313
define i16 @t0(i32 %x, i16 %y) {
1414
; CHECK-LABEL: @t0(
15-
; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]]
16-
; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
17-
; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
18-
; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
19-
; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -24
20-
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[T3]], [[T4]]
15+
; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i16
16+
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[X_TR]], 8
2117
; CHECK-NEXT: ret i16 [[T5]]
2218
;
2319
%t0 = sub i16 32, %y
@@ -31,12 +27,8 @@ define i16 @t0(i32 %x, i16 %y) {
3127

3228
define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) {
3329
; CHECK-LABEL: @t1_vec_splat(
34-
; CHECK-NEXT: [[T0:%.*]] = sub <2 x i16> <i16 32, i16 32>, [[Y:%.*]]
35-
; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32>
36-
; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[X:%.*]], [[T1]]
37-
; CHECK-NEXT: [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16>
38-
; CHECK-NEXT: [[T4:%.*]] = add <2 x i16> [[Y]], <i16 -24, i16 -24>
39-
; CHECK-NEXT: [[T5:%.*]] = shl <2 x i16> [[T3]], [[T4]]
30+
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 8, i32 8>
31+
; CHECK-NEXT: [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16>
4032
; CHECK-NEXT: ret <2 x i16> [[T5]]
4133
;
4234
%t0 = sub <2 x i16> <i16 32, i16 32>, %y
@@ -50,12 +42,8 @@ define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) {
5042

5143
define <2 x i16> @t2_vec_nonsplat(<2 x i32> %x, <2 x i16> %y) {
5244
; CHECK-LABEL: @t2_vec_nonsplat(
53-
; CHECK-NEXT: [[T0:%.*]] = sub <2 x i16> <i16 32, i16 30>, [[Y:%.*]]
54-
; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32>
55-
; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[X:%.*]], [[T1]]
56-
; CHECK-NEXT: [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16>
57-
; CHECK-NEXT: [[T4:%.*]] = add <2 x i16> [[Y]], <i16 -24, i16 0>
58-
; CHECK-NEXT: [[T5:%.*]] = shl <2 x i16> [[T3]], [[T4]]
45+
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 8, i32 30>
46+
; CHECK-NEXT: [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16>
5947
; CHECK-NEXT: ret <2 x i16> [[T5]]
6048
;
6149
%t0 = sub <2 x i16> <i16 32, i16 30>, %y
@@ -71,12 +59,8 @@ define <2 x i16> @t2_vec_nonsplat(<2 x i32> %x, <2 x i16> %y) {
7159

7260
define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) {
7361
; CHECK-LABEL: @t3_vec_nonsplat_undef0(
74-
; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 undef, i16 32>, [[Y:%.*]]
75-
; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
76-
; CHECK-NEXT: [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]]
77-
; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
78-
; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -24, i16 -24, i16 -24>
79-
; CHECK-NEXT: [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]]
62+
; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
63+
; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
8064
; CHECK-NEXT: ret <3 x i16> [[T5]]
8165
;
8266
%t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y
@@ -90,12 +74,8 @@ define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) {
9074

9175
define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
9276
; CHECK-LABEL: @t4_vec_nonsplat_undef1(
93-
; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 32, i16 32>, [[Y:%.*]]
94-
; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
95-
; CHECK-NEXT: [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]]
96-
; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
97-
; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -24, i16 undef, i16 -24>
98-
; CHECK-NEXT: [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]]
77+
; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
78+
; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
9979
; CHECK-NEXT: ret <3 x i16> [[T5]]
10080
;
10181
%t0 = sub <3 x i16> <i16 32, i16 32, i16 32>, %y
@@ -109,12 +89,8 @@ define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
10989

11090
define <3 x i16> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
11191
; CHECK-LABEL: @t5_vec_nonsplat_undef1(
112-
; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 undef, i16 32>, [[Y:%.*]]
113-
; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
114-
; CHECK-NEXT: [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]]
115-
; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
116-
; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -24, i16 undef, i16 -24>
117-
; CHECK-NEXT: [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]]
92+
; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
93+
; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
11894
; CHECK-NEXT: ret <3 x i16> [[T5]]
11995
;
12096
%t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y
@@ -137,9 +113,9 @@ define i16 @t6_extrause0(i32 %x, i16 %y) {
137113
; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
138114
; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
139115
; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
140-
; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -24
141116
; CHECK-NEXT: call void @use16(i16 [[T3]])
142-
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[T3]], [[T4]]
117+
; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X]] to i16
118+
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[X_TR]], 8
143119
; CHECK-NEXT: ret i16 [[T5]]
144120
;
145121
%t0 = sub i16 32, %y
@@ -154,13 +130,10 @@ define i16 @t6_extrause0(i32 %x, i16 %y) {
154130

155131
define i16 @t7_extrause1(i32 %x, i16 %y) {
156132
; CHECK-LABEL: @t7_extrause1(
157-
; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]]
158-
; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
159-
; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
160-
; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
161-
; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -24
133+
; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y:%.*]], -24
162134
; CHECK-NEXT: call void @use16(i16 [[T4]])
163-
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[T3]], [[T4]]
135+
; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i16
136+
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[X_TR]], 8
164137
; CHECK-NEXT: ret i16 [[T5]]
165138
;
166139
%t0 = sub i16 32, %y
@@ -252,3 +225,20 @@ define i16 @n11(i32 %x, i16 %y) {
252225
%t5 = shl i16 %t3, %t4
253226
ret i16 %t3
254227
}
228+
229+
; Bit width mismatch of shit amount
230+
231+
@Y32 = global i32 42
232+
@Y16 = global i16 42
233+
define i16 @t01(i32 %x) {
234+
; CHECK-LABEL: @t01(
235+
; CHECK-NEXT: [[T0:%.*]] = shl i32 [[X:%.*]], ptrtoint (i32* @Y32 to i32)
236+
; CHECK-NEXT: [[T1:%.*]] = trunc i32 [[T0]] to i16
237+
; CHECK-NEXT: [[T2:%.*]] = shl i16 [[T1]], ptrtoint (i16* @Y16 to i16)
238+
; CHECK-NEXT: ret i16 [[T2]]
239+
;
240+
%t0 = shl i32 %x, ptrtoint (i32* @Y32 to i32)
241+
%t1 = trunc i32 %t0 to i16
242+
%t2 = shl i16 %t1, ptrtoint (i16* @Y16 to i16)
243+
ret i16 %t2
244+
}

0 commit comments

Comments
 (0)
Please sign in to comment.