Skip to content

Commit 367c2ae

Browse files
author
Daniel Neilson
committedApr 5, 2018
[InstCombine] Properly change GEP type when reassociating loop invariant GEP chains
Summary: This is a fix to PR37005. Essentially, rL328539 ([InstCombine] reassociate loop invariant GEP chains to enable LICM) contains a bug whereby it will convert: %src = getelementptr inbounds i8, i8* %base, <2 x i64> %val %res = getelementptr inbounds i8, <2 x i8*> %src, i64 %val2 into: %src = getelementptr inbounds i8, i8* %base, i64 %val2 %res = getelementptr inbounds i8, <2 x i8*> %src, <2 x i64> %val By swapping the index operands if the GEPs are in a loop, and %val is loop variant while %val2 is loop invariant. This fix recreates new GEP instructions if the index operand swap would result in the type of %src changing from vector to scalar, or vice versa. Reviewers: sebpop, spatel Reviewed By: sebpop Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D45287 llvm-svn: 329331
1 parent 9eec202 commit 367c2ae

File tree

2 files changed

+133
-3
lines changed

2 files changed

+133
-3
lines changed
 

‎llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1680,9 +1680,42 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
16801680
// invariant: this breaks the dependence between GEPs and allows LICM
16811681
// to hoist the invariant part out of the loop.
16821682
if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) {
1683-
Src->setOperand(1, GO1);
1684-
GEP.setOperand(1, SO1);
1685-
return &GEP;
1683+
// We have to be careful here.
1684+
// We have something like:
1685+
// %src = getelementptr <ty>, <ty>* %base, <ty> %idx
1686+
// %gep = getelementptr <ty>, <ty>* %src, <ty> %idx2
1687+
// If we just swap idx & idx2 then we could inadvertantly
1688+
// change %src from a vector to a scalar, or vice versa.
1689+
// Cases:
1690+
// 1) %base a scalar & idx a scalar & idx2 a vector
1691+
// => Swapping idx & idx2 turns %src into a vector type.
1692+
// 2) %base a scalar & idx a vector & idx2 a scalar
1693+
// => Swapping idx & idx2 turns %src in a scalar type
1694+
// 3) %base, %idx, and %idx2 are scalars
1695+
// => %src & %gep are scalars
1696+
// => swapping idx & idx2 is safe
1697+
// 4) %base a vector
1698+
// => %src is a vector
1699+
// => swapping idx & idx2 is safe.
1700+
auto *SO0 = Src->getOperand(0);
1701+
auto *SO0Ty = SO0->getType();
1702+
if (!isa<VectorType>(GEPType) || // case 3
1703+
isa<VectorType>(SO0Ty)) { // case 4
1704+
Src->setOperand(1, GO1);
1705+
GEP.setOperand(1, SO1);
1706+
return &GEP;
1707+
} else {
1708+
// Case 1 or 2
1709+
// -- have to recreate %src & %gep
1710+
// put NewSrc at same location as %src
1711+
Builder.SetInsertPoint(cast<Instruction>(PtrOp));
1712+
auto *NewSrc = cast<GetElementPtrInst>(
1713+
Builder.CreateGEP(SO0, GO1, Src->getName()));
1714+
NewSrc->setIsInBounds(Src->isInBounds());
1715+
auto *NewGEP = GetElementPtrInst::Create(nullptr, NewSrc, {SO1});
1716+
NewGEP->setIsInBounds(GEP.isInBounds());
1717+
return NewGEP;
1718+
}
16861719
}
16871720
}
16881721
}

‎llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,3 +88,100 @@ do.end: ; preds = %do.body, %land.lhs.
8888
%cont.0 = phi i32 [ 1, %entry ], [ 0, %if.then ], [ 0, %land.lhs.true ], [ 1, %do.body ]
8989
ret i32 %cont.0
9090
}
91+
92+
declare void @blackhole(<2 x i8*>)
93+
94+
define void @PR37005(i8* %base, i8** %in) {
95+
; CHECK-LABEL: @PR37005(
96+
; CHECK-NEXT: entry:
97+
; CHECK-NEXT: br label [[LOOP:%.*]]
98+
; CHECK: loop:
99+
; CHECK-NEXT: [[E2:%.*]] = getelementptr inbounds i8*, i8** [[IN:%.*]], i64 undef
100+
; CHECK-NEXT: [[E4:%.*]] = getelementptr inbounds i8*, i8** [[E2]], <2 x i64> <i64 0, i64 1>
101+
; CHECK-NEXT: [[PI1:%.*]] = ptrtoint <2 x i8**> [[E4]] to <2 x i64>
102+
; CHECK-NEXT: [[LR1:%.*]] = lshr <2 x i64> [[PI1]], <i64 21, i64 21>
103+
; CHECK-NEXT: [[SL1:%.*]] = shl nuw nsw <2 x i64> [[LR1]], <i64 7, i64 7>
104+
; CHECK-NEXT: [[E51:%.*]] = getelementptr inbounds i8, i8* [[BASE:%.*]], i64 80
105+
; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, i8* [[E51]], <2 x i64> [[SL1]]
106+
; CHECK-NEXT: call void @blackhole(<2 x i8*> [[E6]])
107+
; CHECK-NEXT: br label [[LOOP]]
108+
;
109+
entry:
110+
br label %loop
111+
112+
loop:
113+
%e1 = getelementptr inbounds i8*, i8** %in, i64 undef
114+
%e2 = getelementptr inbounds i8*, i8** %e1, i64 6
115+
%bc1 = bitcast i8** %e2 to <2 x i8*>*
116+
%e3 = getelementptr inbounds <2 x i8*>, <2 x i8*>* %bc1, i64 0, i64 0
117+
%e4 = getelementptr inbounds i8*, i8** %e3, <2 x i64> <i64 0, i64 1>
118+
%pi1 = ptrtoint <2 x i8**> %e4 to <2 x i64>
119+
%lr1 = lshr <2 x i64> %pi1, <i64 21, i64 21>
120+
%sl1 = shl nuw nsw <2 x i64> %lr1, <i64 7, i64 7>
121+
%e5 = getelementptr inbounds i8, i8* %base, <2 x i64> %sl1
122+
%e6 = getelementptr inbounds i8, <2 x i8*> %e5, i64 80
123+
call void @blackhole(<2 x i8*> %e6)
124+
br label %loop
125+
}
126+
127+
define void @PR37005_2(i8* %base, i8** %in) {
128+
; CHECK-LABEL: @PR37005_2(
129+
; CHECK-NEXT: entry:
130+
; CHECK-NEXT: br label [[LOOP:%.*]]
131+
; CHECK: loop:
132+
; CHECK-NEXT: [[E2:%.*]] = getelementptr inbounds i8*, i8** [[IN:%.*]], i64 undef
133+
; CHECK-NEXT: [[PI1:%.*]] = ptrtoint i8** [[E2]] to i64
134+
; CHECK-NEXT: [[LR1:%.*]] = lshr i64 [[PI1]], 21
135+
; CHECK-NEXT: [[SL1:%.*]] = shl nuw nsw i64 [[LR1]], 7
136+
; CHECK-NEXT: [[E51:%.*]] = getelementptr inbounds i8, i8* [[BASE:%.*]], <2 x i64> <i64 80, i64 60>
137+
; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, <2 x i8*> [[E51]], i64 [[SL1]]
138+
; CHECK-NEXT: call void @blackhole(<2 x i8*> [[E6]])
139+
; CHECK-NEXT: br label [[LOOP]]
140+
;
141+
entry:
142+
br label %loop
143+
144+
loop:
145+
%e1 = getelementptr inbounds i8*, i8** %in, i64 undef
146+
%e2 = getelementptr inbounds i8*, i8** %e1, i64 6
147+
%pi1 = ptrtoint i8** %e2 to i64
148+
%lr1 = lshr i64 %pi1, 21
149+
%sl1 = shl nuw nsw i64 %lr1, 7
150+
%e5 = getelementptr inbounds i8, i8* %base, i64 %sl1
151+
%e6 = getelementptr inbounds i8, i8* %e5, <2 x i64> <i64 80, i64 60>
152+
call void @blackhole(<2 x i8*> %e6)
153+
br label %loop
154+
}
155+
156+
define void @PR37005_3(<2 x i8*> %base, i8** %in) {
157+
; CHECK-LABEL: @PR37005_3(
158+
; CHECK-NEXT: entry:
159+
; CHECK-NEXT: br label [[LOOP:%.*]]
160+
; CHECK: loop:
161+
; CHECK-NEXT: [[E2:%.*]] = getelementptr inbounds i8*, i8** [[IN:%.*]], i64 undef
162+
; CHECK-NEXT: [[E4:%.*]] = getelementptr inbounds i8*, i8** [[E2]], <2 x i64> <i64 0, i64 1>
163+
; CHECK-NEXT: [[PI1:%.*]] = ptrtoint <2 x i8**> [[E4]] to <2 x i64>
164+
; CHECK-NEXT: [[LR1:%.*]] = lshr <2 x i64> [[PI1]], <i64 21, i64 21>
165+
; CHECK-NEXT: [[SL1:%.*]] = shl nuw nsw <2 x i64> [[LR1]], <i64 7, i64 7>
166+
; CHECK-NEXT: [[E5:%.*]] = getelementptr inbounds i8, <2 x i8*> [[BASE:%.*]], i64 80
167+
; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, <2 x i8*> [[E5]], <2 x i64> [[SL1]]
168+
; CHECK-NEXT: call void @blackhole(<2 x i8*> [[E6]])
169+
; CHECK-NEXT: br label [[LOOP]]
170+
;
171+
entry:
172+
br label %loop
173+
174+
loop:
175+
%e1 = getelementptr inbounds i8*, i8** %in, i64 undef
176+
%e2 = getelementptr inbounds i8*, i8** %e1, i64 6
177+
%bc1 = bitcast i8** %e2 to <2 x i8*>*
178+
%e3 = getelementptr inbounds <2 x i8*>, <2 x i8*>* %bc1, i64 0, i64 0
179+
%e4 = getelementptr inbounds i8*, i8** %e3, <2 x i64> <i64 0, i64 1>
180+
%pi1 = ptrtoint <2 x i8**> %e4 to <2 x i64>
181+
%lr1 = lshr <2 x i64> %pi1, <i64 21, i64 21>
182+
%sl1 = shl nuw nsw <2 x i64> %lr1, <i64 7, i64 7>
183+
%e5 = getelementptr inbounds i8, <2 x i8*> %base, <2 x i64> %sl1
184+
%e6 = getelementptr inbounds i8, <2 x i8*> %e5, i64 80
185+
call void @blackhole(<2 x i8*> %e6)
186+
br label %loop
187+
}

0 commit comments

Comments
 (0)
Please sign in to comment.