Skip to content

Commit 0dba791

Browse files
committedAug 9, 2019
[ARM][ParallelDSP] Replace SExt uses
As loads are combined and widened, we replaced their sext users operands whereas we should have been replacing the uses of the sext. I've added a load of tests, with only a few of them originally causing assertion failures, the rest improve pattern coverage. Differential Revision: https://reviews.llvm.org/D65740 llvm-svn: 368404
1 parent ef0c3dd commit 0dba791

File tree

4 files changed

+497
-5
lines changed

4 files changed

+497
-5
lines changed
 

‎llvm/lib/Target/ARM/ARMParallelDSP.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ namespace {
6868
}
6969

7070
LoadInst *getBaseLoad() const {
71-
return cast<LoadInst>(LHS);
71+
return VecLd.front();
7272
}
7373
};
7474

@@ -696,13 +696,15 @@ LoadInst* ARMParallelDSP::CreateWideLoad(MemInstList &Loads,
696696
// Loads[0] needs trunc while Loads[1] needs a lshr and trunc.
697697
// TODO: Support big-endian as well.
698698
Value *Bottom = IRB.CreateTrunc(WideLoad, Base->getType());
699-
BaseSExt->setOperand(0, Bottom);
699+
Value *NewBaseSExt = IRB.CreateSExt(Bottom, BaseSExt->getType());
700+
BaseSExt->replaceAllUsesWith(NewBaseSExt);
700701

701702
IntegerType *OffsetTy = cast<IntegerType>(Offset->getType());
702703
Value *ShiftVal = ConstantInt::get(LoadTy, OffsetTy->getBitWidth());
703704
Value *Top = IRB.CreateLShr(WideLoad, ShiftVal);
704705
Value *Trunc = IRB.CreateTrunc(Top, OffsetTy);
705-
OffsetSExt->setOperand(0, Trunc);
706+
Value *NewOffsetSExt = IRB.CreateSExt(Trunc, OffsetSExt->getType());
707+
OffsetSExt->replaceAllUsesWith(NewOffsetSExt);
706708

707709
WideLoads.emplace(std::make_pair(Base,
708710
make_unique<WidenedLoad>(Loads, WideLoad)));

‎llvm/test/CodeGen/ARM/ParallelDSP/blocks.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]]
66
; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
77
; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]]
8-
; CHECK call i32 @llvm.arm.smlad(i32 [[A]], i32 [[B]], i32 %acc)
8+
; CHECK: call i32 @llvm.arm.smlad(i32 [[A]], i32 [[B]], i32 %acc)
99
define i32 @single_block(i16* %a, i16* %b, i32 %acc) {
1010
entry:
1111
%ld.a.0 = load i16, i16* %a
@@ -30,7 +30,7 @@ entry:
3030
; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]]
3131
; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
3232
; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]]
33-
; CHECK call i32 @llvm.arm.smlad(i32 [[A]], i32 [[B]], i32 0)
33+
; CHECK: call i32 @llvm.arm.smlad(i32 [[A]], i32 [[B]], i32 0)
3434
define i32 @multi_block(i16* %a, i16* %b, i32 %acc) {
3535
entry:
3636
%ld.a.0 = load i16, i16* %a
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,329 @@
1+
; RUN: opt -arm-parallel-dsp -mtriple=armv7-a -S %s -o - | FileCheck %s
2+
3+
; CHECK-LABEL: exchange_1
4+
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
5+
; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
6+
; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
7+
; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
8+
; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]]
9+
define i32 @exchange_1(i16* %a, i16* %b, i32 %acc) {
10+
entry:
11+
%addr.a.1 = getelementptr i16, i16* %a, i32 1
12+
%addr.b.1 = getelementptr i16, i16* %b, i32 1
13+
%ld.a.0 = load i16, i16* %a
14+
%sext.a.0 = sext i16 %ld.a.0 to i32
15+
%ld.b.0 = load i16, i16* %b
16+
%ld.a.1 = load i16, i16* %addr.a.1
17+
%ld.b.1 = load i16, i16* %addr.b.1
18+
%sext.a.1 = sext i16 %ld.a.1 to i32
19+
%sext.b.1 = sext i16 %ld.b.1 to i32
20+
%sext.b.0 = sext i16 %ld.b.0 to i32
21+
%mul.0 = mul i32 %sext.a.0, %sext.b.1
22+
%mul.1 = mul i32 %sext.a.1, %sext.b.0
23+
%add = add i32 %mul.0, %mul.1
24+
%res = add i32 %add, %acc
25+
ret i32 %res
26+
}
27+
28+
; CHECK-LABEL: exchange_2
29+
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
30+
; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
31+
; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
32+
; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
33+
; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]]
34+
define i32 @exchange_2(i16* %a, i16* %b, i32 %acc) {
35+
entry:
36+
%addr.a.1 = getelementptr i16, i16* %a, i32 1
37+
%addr.b.1 = getelementptr i16, i16* %b, i32 1
38+
%ld.a.0 = load i16, i16* %a
39+
%sext.a.0 = sext i16 %ld.a.0 to i32
40+
%ld.b.0 = load i16, i16* %b
41+
%ld.a.1 = load i16, i16* %addr.a.1
42+
%ld.b.1 = load i16, i16* %addr.b.1
43+
%sext.a.1 = sext i16 %ld.a.1 to i32
44+
%sext.b.1 = sext i16 %ld.b.1 to i32
45+
%sext.b.0 = sext i16 %ld.b.0 to i32
46+
%mul.0 = mul i32 %sext.b.1, %sext.a.0
47+
%mul.1 = mul i32 %sext.b.0, %sext.a.1
48+
%add = add i32 %mul.0, %mul.1
49+
%res = add i32 %add, %acc
50+
ret i32 %res
51+
}
52+
53+
; CHECK-LABEL: exchange_3
54+
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
55+
; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
56+
; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
57+
; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
58+
; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A]]
59+
define i32 @exchange_3(i16* %a, i16* %b, i32 %acc) {
60+
entry:
61+
%addr.a.1 = getelementptr i16, i16* %a, i32 1
62+
%addr.b.1 = getelementptr i16, i16* %b, i32 1
63+
%ld.a.0 = load i16, i16* %a
64+
%sext.a.0 = sext i16 %ld.a.0 to i32
65+
%ld.b.0 = load i16, i16* %b
66+
%ld.a.1 = load i16, i16* %addr.a.1
67+
%ld.b.1 = load i16, i16* %addr.b.1
68+
%sext.a.1 = sext i16 %ld.a.1 to i32
69+
%sext.b.1 = sext i16 %ld.b.1 to i32
70+
%sext.b.0 = sext i16 %ld.b.0 to i32
71+
%mul.0 = mul i32 %sext.a.0, %sext.b.1
72+
%mul.1 = mul i32 %sext.a.1, %sext.b.0
73+
%add = add i32 %mul.1, %mul.0
74+
%res = add i32 %add, %acc
75+
ret i32 %res
76+
}
77+
78+
; CHECK-LABEL: exchange_4
79+
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
80+
; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
81+
; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
82+
; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
83+
; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A]]
84+
define i32 @exchange_4(i16* %a, i16* %b, i32 %acc) {
85+
entry:
86+
%addr.a.1 = getelementptr i16, i16* %a, i32 1
87+
%addr.b.1 = getelementptr i16, i16* %b, i32 1
88+
%ld.a.0 = load i16, i16* %a
89+
%sext.a.0 = sext i16 %ld.a.0 to i32
90+
%ld.b.0 = load i16, i16* %b
91+
%ld.a.1 = load i16, i16* %addr.a.1
92+
%ld.b.1 = load i16, i16* %addr.b.1
93+
%sext.a.1 = sext i16 %ld.a.1 to i32
94+
%sext.b.1 = sext i16 %ld.b.1 to i32
95+
%sext.b.0 = sext i16 %ld.b.0 to i32
96+
%mul.0 = mul i32 %sext.b.1, %sext.a.0
97+
%mul.1 = mul i32 %sext.b.0, %sext.a.1
98+
%add = add i32 %mul.1, %mul.0
99+
%res = add i32 %add, %acc
100+
ret i32 %res
101+
}
102+
103+
; CHECK-LABEL: exchange_multi_use_1
104+
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
105+
; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
106+
; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
107+
; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
108+
; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
109+
; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32*
110+
; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
111+
; CHECK: [[X:%[^ ]+]] = call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]], i32 %acc
112+
; CHECK: call i32 @llvm.arm.smlad(i32 [[LD_A_2]], i32 [[LD_B]], i32 [[X]])
113+
define i32 @exchange_multi_use_1(i16* %a, i16* %b, i32 %acc) {
114+
entry:
115+
%addr.a.1 = getelementptr i16, i16* %a, i32 1
116+
%addr.b.1 = getelementptr i16, i16* %b, i32 1
117+
%ld.a.0 = load i16, i16* %a
118+
%sext.a.0 = sext i16 %ld.a.0 to i32
119+
%ld.b.0 = load i16, i16* %b
120+
%ld.a.1 = load i16, i16* %addr.a.1
121+
%ld.b.1 = load i16, i16* %addr.b.1
122+
%sext.a.1 = sext i16 %ld.a.1 to i32
123+
%sext.b.1 = sext i16 %ld.b.1 to i32
124+
%sext.b.0 = sext i16 %ld.b.0 to i32
125+
%mul.0 = mul i32 %sext.a.0, %sext.b.1
126+
%mul.1 = mul i32 %sext.a.1, %sext.b.0
127+
%add = add i32 %mul.0, %mul.1
128+
%addr.a.2 = getelementptr i16, i16* %a, i32 2
129+
%addr.a.3 = getelementptr i16, i16* %a, i32 3
130+
%ld.a.2 = load i16, i16* %addr.a.2
131+
%ld.a.3 = load i16, i16* %addr.a.3
132+
%sext.a.2 = sext i16 %ld.a.2 to i32
133+
%sext.a.3 = sext i16 %ld.a.3 to i32
134+
%mul.2 = mul i32 %sext.a.3, %sext.b.1
135+
%mul.3 = mul i32 %sext.a.2, %sext.b.0
136+
%add.1 = add i32 %mul.2, %mul.3
137+
%add.2 = add i32 %add, %add.1
138+
%res = add i32 %add.2, %acc
139+
ret i32 %res
140+
}
141+
142+
; CHECK-LABEL: exchange_multi_use_2
143+
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
144+
; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
145+
; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
146+
; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
147+
; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
148+
; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32*
149+
; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
150+
; CHECK: [[X:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[LD_A]], i32 [[LD_B]], i32 %acc
151+
; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A_2]], i32 [[X]])
152+
define i32 @exchange_multi_use_2(i16* %a, i16* %b, i32 %acc) {
153+
entry:
154+
%addr.a.1 = getelementptr i16, i16* %a, i32 1
155+
%addr.b.1 = getelementptr i16, i16* %b, i32 1
156+
%ld.a.0 = load i16, i16* %a
157+
%sext.a.0 = sext i16 %ld.a.0 to i32
158+
%ld.b.0 = load i16, i16* %b
159+
%ld.a.1 = load i16, i16* %addr.a.1
160+
%ld.b.1 = load i16, i16* %addr.b.1
161+
%sext.a.1 = sext i16 %ld.a.1 to i32
162+
%sext.b.1 = sext i16 %ld.b.1 to i32
163+
%sext.b.0 = sext i16 %ld.b.0 to i32
164+
%mul.0 = mul i32 %sext.a.0, %sext.b.0
165+
%mul.1 = mul i32 %sext.a.1, %sext.b.1
166+
%add = add i32 %mul.0, %mul.1
167+
%addr.a.2 = getelementptr i16, i16* %a, i32 2
168+
%addr.a.3 = getelementptr i16, i16* %a, i32 3
169+
%ld.a.2 = load i16, i16* %addr.a.2
170+
%ld.a.3 = load i16, i16* %addr.a.3
171+
%sext.a.2 = sext i16 %ld.a.2 to i32
172+
%sext.a.3 = sext i16 %ld.a.3 to i32
173+
%mul.2 = mul i32 %sext.b.0, %sext.a.3
174+
%mul.3 = mul i32 %sext.b.1, %sext.a.2
175+
%add.1 = add i32 %mul.2, %mul.3
176+
%add.2 = add i32 %add, %add.1
177+
%res = add i32 %add.2, %acc
178+
ret i32 %res
179+
}
180+
181+
; TODO: Why aren't two intrinsics generated?
182+
; CHECK-LABEL: exchange_multi_use_3
183+
; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
184+
; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
185+
; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
186+
; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32*
187+
; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
188+
; CHECK-NOT: call i32 @llvm.arm.smlad
189+
; CHECK: [[X:%[^ ]+]] = call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A_2]], i32 0
190+
define i32 @exchange_multi_use_3(i16* %a, i16* %b, i32 %acc) {
191+
entry:
192+
%addr.a.1 = getelementptr i16, i16* %a, i32 1
193+
%addr.b.1 = getelementptr i16, i16* %b, i32 1
194+
%ld.a.0 = load i16, i16* %a
195+
%sext.a.0 = sext i16 %ld.a.0 to i32
196+
%ld.b.0 = load i16, i16* %b
197+
%ld.a.1 = load i16, i16* %addr.a.1
198+
%ld.b.1 = load i16, i16* %addr.b.1
199+
%sext.a.1 = sext i16 %ld.a.1 to i32
200+
%sext.b.1 = sext i16 %ld.b.1 to i32
201+
%sext.b.0 = sext i16 %ld.b.0 to i32
202+
%addr.a.2 = getelementptr i16, i16* %a, i32 2
203+
%addr.a.3 = getelementptr i16, i16* %a, i32 3
204+
%ld.a.2 = load i16, i16* %addr.a.2
205+
%ld.a.3 = load i16, i16* %addr.a.3
206+
%sext.a.2 = sext i16 %ld.a.2 to i32
207+
%sext.a.3 = sext i16 %ld.a.3 to i32
208+
%mul.2 = mul i32 %sext.b.0, %sext.a.3
209+
%mul.3 = mul i32 %sext.b.1, %sext.a.2
210+
%mul.0 = mul i32 %sext.a.0, %sext.b.0
211+
%mul.1 = mul i32 %sext.a.1, %sext.b.1
212+
%add = add i32 %mul.0, %mul.1
213+
%add.1 = add i32 %mul.2, %mul.3
214+
%sub = sub i32 %add, %add.1
215+
%res = add i32 %acc, %sub
216+
ret i32 %res
217+
}
218+
219+
; TODO: Why isn't smladx generated too?
220+
; CHECK-LABEL: exchange_multi_use_4
221+
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
222+
; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
223+
; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
224+
; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
225+
; CHECK: [[X:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[LD_A]], i32 [[LD_B]], i32 0
226+
; CHECK-NOT: call i32 @llvm.arm.smlad
227+
define i32 @exchange_multi_use_4(i16* %a, i16* %b, i32 %acc) {
228+
entry:
229+
%addr.a.1 = getelementptr i16, i16* %a, i32 1
230+
%addr.b.1 = getelementptr i16, i16* %b, i32 1
231+
%ld.a.0 = load i16, i16* %a
232+
%sext.a.0 = sext i16 %ld.a.0 to i32
233+
%ld.b.0 = load i16, i16* %b
234+
%ld.a.1 = load i16, i16* %addr.a.1
235+
%ld.b.1 = load i16, i16* %addr.b.1
236+
%sext.a.1 = sext i16 %ld.a.1 to i32
237+
%sext.b.1 = sext i16 %ld.b.1 to i32
238+
%sext.b.0 = sext i16 %ld.b.0 to i32
239+
%addr.a.2 = getelementptr i16, i16* %a, i32 2
240+
%addr.a.3 = getelementptr i16, i16* %a, i32 3
241+
%ld.a.2 = load i16, i16* %addr.a.2
242+
%ld.a.3 = load i16, i16* %addr.a.3
243+
%sext.a.2 = sext i16 %ld.a.2 to i32
244+
%sext.a.3 = sext i16 %ld.a.3 to i32
245+
%mul.2 = mul i32 %sext.b.0, %sext.a.3
246+
%mul.3 = mul i32 %sext.b.1, %sext.a.2
247+
%mul.0 = mul i32 %sext.a.0, %sext.b.0
248+
%mul.1 = mul i32 %sext.a.1, %sext.b.1
249+
%add.1 = add i32 %mul.2, %mul.3
250+
%add = add i32 %mul.0, %mul.1
251+
%sub = sub i32 %add, %add.1
252+
%res = add i32 %acc, %sub
253+
ret i32 %res
254+
}
255+
256+
; CHECK-LABEL: exchange_swap
257+
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
258+
; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
259+
; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
260+
; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
261+
; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A]]
262+
define i32 @exchange_swap(i16* %a, i16* %b, i32 %acc) {
263+
entry:
264+
%addr.a.1 = getelementptr i16, i16* %a, i32 1
265+
%addr.b.1 = getelementptr i16, i16* %b, i32 1
266+
%ld.a.0 = load i16, i16* %a
267+
%sext.a.0 = sext i16 %ld.a.0 to i32
268+
%ld.b.0 = load i16, i16* %b
269+
%ld.a.1 = load i16, i16* %addr.a.1
270+
%ld.b.1 = load i16, i16* %addr.b.1
271+
%sext.a.1 = sext i16 %ld.a.1 to i32
272+
%sext.b.1 = sext i16 %ld.b.1 to i32
273+
%sext.b.0 = sext i16 %ld.b.0 to i32
274+
%mul.0 = mul i32 %sext.a.1, %sext.b.0
275+
%mul.1 = mul i32 %sext.a.0, %sext.b.1
276+
%add = add i32 %mul.0, %mul.1
277+
%res = add i32 %add, %acc
278+
ret i32 %res
279+
}
280+
281+
; CHECK-LABEL: exchange_swap_2
282+
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
283+
; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
284+
; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
285+
; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
286+
; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]]
287+
define i32 @exchange_swap_2(i16* %a, i16* %b, i32 %acc) {
288+
entry:
289+
%addr.a.1 = getelementptr i16, i16* %a, i32 1
290+
%addr.b.1 = getelementptr i16, i16* %b, i32 1
291+
%ld.a.0 = load i16, i16* %a
292+
%sext.a.0 = sext i16 %ld.a.0 to i32
293+
%ld.b.0 = load i16, i16* %b
294+
%ld.a.1 = load i16, i16* %addr.a.1
295+
%ld.b.1 = load i16, i16* %addr.b.1
296+
%sext.a.1 = sext i16 %ld.a.1 to i32
297+
%sext.b.1 = sext i16 %ld.b.1 to i32
298+
%sext.b.0 = sext i16 %ld.b.0 to i32
299+
%mul.0 = mul i32 %sext.a.1, %sext.b.0
300+
%mul.1 = mul i32 %sext.a.0, %sext.b.1
301+
%add = add i32 %mul.1, %mul.0
302+
%res = add i32 %add, %acc
303+
ret i32 %res
304+
}
305+
306+
; CHECK-LABEL: exchange_swap_3
307+
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
308+
; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
309+
; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
310+
; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
311+
; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]]
312+
define i32 @exchange_swap_3(i16* %a, i16* %b, i32 %acc) {
313+
entry:
314+
%addr.a.1 = getelementptr i16, i16* %a, i32 1
315+
%addr.b.1 = getelementptr i16, i16* %b, i32 1
316+
%ld.a.0 = load i16, i16* %a
317+
%sext.a.0 = sext i16 %ld.a.0 to i32
318+
%ld.b.0 = load i16, i16* %b
319+
%ld.a.1 = load i16, i16* %addr.a.1
320+
%ld.b.1 = load i16, i16* %addr.b.1
321+
%sext.a.1 = sext i16 %ld.a.1 to i32
322+
%sext.b.1 = sext i16 %ld.b.1 to i32
323+
%sext.b.0 = sext i16 %ld.b.0 to i32
324+
%mul.0 = mul i32 %sext.b.0, %sext.a.1
325+
%mul.1 = mul i32 %sext.b.1, %sext.a.0
326+
%add = add i32 %mul.1, %mul.0
327+
%res = add i32 %add, %acc
328+
ret i32 %res
329+
}

0 commit comments

Comments
 (0)
Please sign in to comment.