Skip to content

Commit 5ab7795

Browse files
committedMay 30, 2014
Allow vectorization of intrinsics such as powi,cttz and ctlz in Loop and SLP Vectorizer.
This patch adds support to vectorize intrinsics such as powi, cttz and ctlz in Vectorizer. These intrinsics are different from other intrinsics as second argument to these function must be same in order to vectorize them and it should be represented as a scalar. Review: http://reviews.llvm.org/D3851#inline-32769 and http://reviews.llvm.org/D3937#inline-32857 llvm-svn: 209873
1 parent 6cd3ebb commit 5ab7795

File tree

5 files changed

+426
-2
lines changed

5 files changed

+426
-2
lines changed
 

‎llvm/include/llvm/Transforms/Utils/VectorUtils.h

+15
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,27 @@ static inline bool isTriviallyVectorizable(Intrinsic::ID ID) {
4848
case Intrinsic::pow:
4949
case Intrinsic::fma:
5050
case Intrinsic::fmuladd:
51+
case Intrinsic::ctlz:
52+
case Intrinsic::cttz:
53+
case Intrinsic::powi:
5154
return true;
5255
default:
5356
return false;
5457
}
5558
}
5659

60+
static bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
61+
unsigned ScalarOpdIdx) {
62+
switch (ID) {
63+
case Intrinsic::ctlz:
64+
case Intrinsic::cttz:
65+
case Intrinsic::powi:
66+
return (ScalarOpdIdx == 1);
67+
default:
68+
return false;
69+
}
70+
}
71+
5772
static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
5873
Intrinsic::ID ValidIntrinsicID) {
5974
if (I.getNumArgOperands() != 1 ||

‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -3123,9 +3123,14 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
31233123
scalarizeInstruction(it);
31243124
break;
31253125
default:
3126+
bool HasScalarOpd = hasVectorInstrinsicScalarOpd(ID, 1);
31263127
for (unsigned Part = 0; Part < UF; ++Part) {
31273128
SmallVector<Value *, 4> Args;
31283129
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
3130+
if (HasScalarOpd && i == 1) {
3131+
Args.push_back(CI->getArgOperand(i));
3132+
continue;
3133+
}
31293134
VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
31303135
Args.push_back(Arg[Part]);
31313136
}
@@ -3474,6 +3479,16 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
34743479
return false;
34753480
}
34763481

3482+
// Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
3483+
// second argument is the same (i.e. loop invariant)
3484+
if (CI &&
3485+
hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) {
3486+
if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) {
3487+
DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
3488+
return false;
3489+
}
3490+
}
3491+
34773492
// Check that the instruction return type is vectorizable.
34783493
// Also, we can't vectorize extractelement instructions.
34793494
if ((!VectorType::isValidElementType(it->getType()) &&

‎llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+27-2
Original file line numberDiff line numberDiff line change
@@ -961,9 +961,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
961961
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
962962
return;
963963
}
964-
965964
Function *Int = CI->getCalledFunction();
966-
965+
Value *A1I = nullptr;
966+
if (hasVectorInstrinsicScalarOpd(ID, 1))
967+
A1I = CI->getArgOperand(1);
967968
for (unsigned i = 1, e = VL.size(); i != e; ++i) {
968969
CallInst *CI2 = dyn_cast<CallInst>(VL[i]);
969970
if (!CI2 || CI2->getCalledFunction() != Int ||
@@ -973,6 +974,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
973974
<< "\n");
974975
return;
975976
}
977+
// ctlz,cttz and powi are special intrinsics whose second argument
978+
// should be same in order for them to be vectorized.
979+
if (hasVectorInstrinsicScalarOpd(ID, 1)) {
980+
Value *A1J = CI2->getArgOperand(1);
981+
if (A1I != A1J) {
982+
newTreeEntry(VL, false);
983+
DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
984+
<< " argument "<< A1I<<"!=" << A1J
985+
<< "\n");
986+
return;
987+
}
988+
}
976989
}
977990

978991
newTreeEntry(VL, true);
@@ -1652,9 +1665,21 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
16521665
case Instruction::Call: {
16531666
CallInst *CI = cast<CallInst>(VL0);
16541667
setInsertPointAfterBundle(E->Scalars);
1668+
Function *FI;
1669+
Intrinsic::ID IID = Intrinsic::not_intrinsic;
1670+
if (CI && (FI = CI->getCalledFunction())) {
1671+
IID = (Intrinsic::ID) FI->getIntrinsicID();
1672+
}
16551673
std::vector<Value *> OpVecs;
16561674
for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
16571675
ValueList OpVL;
1676+
// ctlz,cttz and powi are special intrinsics whose second argument is
1677+
// a scalar. This argument should not be vectorized.
1678+
if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) {
1679+
CallInst *CEI = cast<CallInst>(E->Scalars[0]);
1680+
OpVecs.push_back(CEI->getArgOperand(j));
1681+
continue;
1682+
}
16581683
for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
16591684
CallInst *CEI = cast<CallInst>(E->Scalars[i]);
16601685
OpVL.push_back(CEI->getArgOperand(j));

‎llvm/test/Transforms/LoopVectorize/intrinsic.ll

+102
Original file line numberDiff line numberDiff line change
@@ -1090,3 +1090,105 @@ for.end: ; preds = %for.body
10901090
ret void
10911091
}
10921092

1093+
declare double @llvm.powi.f64(double %Val, i32 %power) nounwind readnone
1094+
1095+
;CHECK-LABEL: @powi_f64(
1096+
;CHECK: llvm.powi.v4f64
1097+
;CHECK: ret void
1098+
define void @powi_f64(i32 %n, double* noalias %y, double* noalias %x, i32 %P) nounwind uwtable {
1099+
entry:
1100+
%cmp9 = icmp sgt i32 %n, 0
1101+
br i1 %cmp9, label %for.body, label %for.end
1102+
1103+
for.body: ; preds = %entry, %for.body
1104+
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1105+
%arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
1106+
%0 = load double* %arrayidx, align 8
1107+
%call = tail call double @llvm.powi.f64(double %0, i32 %P) nounwind readnone
1108+
%arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
1109+
store double %call, double* %arrayidx4, align 8
1110+
%indvars.iv.next = add i64 %indvars.iv, 1
1111+
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
1112+
%exitcond = icmp eq i32 %lftr.wideiv, %n
1113+
br i1 %exitcond, label %for.end, label %for.body
1114+
1115+
for.end: ; preds = %for.body, %entry
1116+
ret void
1117+
}
1118+
1119+
;CHECK-LABEL: @powi_f64_neg(
1120+
;CHECK-NOT: llvm.powi.v4f64
1121+
;CHECK: ret void
1122+
define void @powi_f64_neg(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
1123+
entry:
1124+
%cmp9 = icmp sgt i32 %n, 0
1125+
br i1 %cmp9, label %for.body, label %for.end
1126+
1127+
for.body: ; preds = %entry, %for.body
1128+
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1129+
%arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
1130+
%0 = load double* %arrayidx, align 8
1131+
%1 = trunc i64 %indvars.iv to i32
1132+
%call = tail call double @llvm.powi.f64(double %0, i32 %1) nounwind readnone
1133+
%arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
1134+
store double %call, double* %arrayidx4, align 8
1135+
%indvars.iv.next = add i64 %indvars.iv, 1
1136+
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
1137+
%exitcond = icmp eq i32 %lftr.wideiv, %n
1138+
br i1 %exitcond, label %for.end, label %for.body
1139+
1140+
for.end: ; preds = %for.body, %entry
1141+
ret void
1142+
}
1143+
1144+
declare i64 @llvm.cttz.i64 (i64, i1) nounwind readnone
1145+
1146+
;CHECK-LABEL: @cttz_f64(
1147+
;CHECK: llvm.cttz.v4i64
1148+
;CHECK: ret void
1149+
define void @cttz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
1150+
entry:
1151+
%cmp9 = icmp sgt i32 %n, 0
1152+
br i1 %cmp9, label %for.body, label %for.end
1153+
1154+
for.body: ; preds = %entry, %for.body
1155+
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1156+
%arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv
1157+
%0 = load i64* %arrayidx, align 8
1158+
%call = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) nounwind readnone
1159+
%arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv
1160+
store i64 %call, i64* %arrayidx4, align 8
1161+
%indvars.iv.next = add i64 %indvars.iv, 1
1162+
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
1163+
%exitcond = icmp eq i32 %lftr.wideiv, %n
1164+
br i1 %exitcond, label %for.end, label %for.body
1165+
1166+
for.end: ; preds = %for.body, %entry
1167+
ret void
1168+
}
1169+
1170+
declare i64 @llvm.ctlz.i64 (i64, i1) nounwind readnone
1171+
1172+
;CHECK-LABEL: @ctlz_f64(
1173+
;CHECK: llvm.ctlz.v4i64
1174+
;CHECK: ret void
1175+
define void @ctlz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
1176+
entry:
1177+
%cmp9 = icmp sgt i32 %n, 0
1178+
br i1 %cmp9, label %for.body, label %for.end
1179+
1180+
for.body: ; preds = %entry, %for.body
1181+
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1182+
%arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv
1183+
%0 = load i64* %arrayidx, align 8
1184+
%call = tail call i64 @llvm.ctlz.i64(i64 %0, i1 true) nounwind readnone
1185+
%arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv
1186+
store i64 %call, i64* %arrayidx4, align 8
1187+
%indvars.iv.next = add i64 %indvars.iv, 1
1188+
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
1189+
%exitcond = icmp eq i32 %lftr.wideiv, %n
1190+
br i1 %exitcond, label %for.end, label %for.body
1191+
1192+
for.end: ; preds = %for.body, %entry
1193+
ret void
1194+
}

‎llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll

+267
Original file line numberDiff line numberDiff line change
@@ -117,3 +117,270 @@ entry:
117117
; CHECK: store <4 x i32>
118118
; CHECK: ret
119119
}
120+
121+
declare i32 @llvm.ctlz.i32(i32,i1) nounwind readnone
122+
123+
define void @vec_ctlz_i32(i32* %a, i32* %b, i32* %c, i1) {
124+
entry:
125+
%i0 = load i32* %a, align 4
126+
%i1 = load i32* %b, align 4
127+
%add1 = add i32 %i0, %i1
128+
%call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
129+
130+
%arrayidx2 = getelementptr inbounds i32* %a, i32 1
131+
%i2 = load i32* %arrayidx2, align 4
132+
%arrayidx3 = getelementptr inbounds i32* %b, i32 1
133+
%i3 = load i32* %arrayidx3, align 4
134+
%add2 = add i32 %i2, %i3
135+
%call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 true) nounwind readnone
136+
137+
%arrayidx4 = getelementptr inbounds i32* %a, i32 2
138+
%i4 = load i32* %arrayidx4, align 4
139+
%arrayidx5 = getelementptr inbounds i32* %b, i32 2
140+
%i5 = load i32* %arrayidx5, align 4
141+
%add3 = add i32 %i4, %i5
142+
%call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
143+
144+
%arrayidx6 = getelementptr inbounds i32* %a, i32 3
145+
%i6 = load i32* %arrayidx6, align 4
146+
%arrayidx7 = getelementptr inbounds i32* %b, i32 3
147+
%i7 = load i32* %arrayidx7, align 4
148+
%add4 = add i32 %i6, %i7
149+
%call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 true) nounwind readnone
150+
151+
store i32 %call1, i32* %c, align 4
152+
%arrayidx8 = getelementptr inbounds i32* %c, i32 1
153+
store i32 %call2, i32* %arrayidx8, align 4
154+
%arrayidx9 = getelementptr inbounds i32* %c, i32 2
155+
store i32 %call3, i32* %arrayidx9, align 4
156+
%arrayidx10 = getelementptr inbounds i32* %c, i32 3
157+
store i32 %call4, i32* %arrayidx10, align 4
158+
ret void
159+
160+
; CHECK-LABEL: @vec_ctlz_i32(
161+
; CHECK: load <4 x i32>
162+
; CHECK: load <4 x i32>
163+
; CHECK: call <4 x i32> @llvm.ctlz.v4i32
164+
; CHECK: store <4 x i32>
165+
; CHECK: ret
166+
}
167+
168+
define void @vec_ctlz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
169+
entry:
170+
%i0 = load i32* %a, align 4
171+
%i1 = load i32* %b, align 4
172+
%add1 = add i32 %i0, %i1
173+
%call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
174+
175+
%arrayidx2 = getelementptr inbounds i32* %a, i32 1
176+
%i2 = load i32* %arrayidx2, align 4
177+
%arrayidx3 = getelementptr inbounds i32* %b, i32 1
178+
%i3 = load i32* %arrayidx3, align 4
179+
%add2 = add i32 %i2, %i3
180+
%call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 false) nounwind readnone
181+
182+
%arrayidx4 = getelementptr inbounds i32* %a, i32 2
183+
%i4 = load i32* %arrayidx4, align 4
184+
%arrayidx5 = getelementptr inbounds i32* %b, i32 2
185+
%i5 = load i32* %arrayidx5, align 4
186+
%add3 = add i32 %i4, %i5
187+
%call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
188+
189+
%arrayidx6 = getelementptr inbounds i32* %a, i32 3
190+
%i6 = load i32* %arrayidx6, align 4
191+
%arrayidx7 = getelementptr inbounds i32* %b, i32 3
192+
%i7 = load i32* %arrayidx7, align 4
193+
%add4 = add i32 %i6, %i7
194+
%call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 false) nounwind readnone
195+
196+
store i32 %call1, i32* %c, align 4
197+
%arrayidx8 = getelementptr inbounds i32* %c, i32 1
198+
store i32 %call2, i32* %arrayidx8, align 4
199+
%arrayidx9 = getelementptr inbounds i32* %c, i32 2
200+
store i32 %call3, i32* %arrayidx9, align 4
201+
%arrayidx10 = getelementptr inbounds i32* %c, i32 3
202+
store i32 %call4, i32* %arrayidx10, align 4
203+
ret void
204+
205+
; CHECK-LABEL: @vec_ctlz_i32_neg(
206+
; CHECK-NOT: call <4 x i32> @llvm.ctlz.v4i32
207+
208+
}
209+
210+
211+
declare i32 @llvm.cttz.i32(i32,i1) nounwind readnone
212+
213+
define void @vec_cttz_i32(i32* %a, i32* %b, i32* %c, i1) {
214+
entry:
215+
%i0 = load i32* %a, align 4
216+
%i1 = load i32* %b, align 4
217+
%add1 = add i32 %i0, %i1
218+
%call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
219+
220+
%arrayidx2 = getelementptr inbounds i32* %a, i32 1
221+
%i2 = load i32* %arrayidx2, align 4
222+
%arrayidx3 = getelementptr inbounds i32* %b, i32 1
223+
%i3 = load i32* %arrayidx3, align 4
224+
%add2 = add i32 %i2, %i3
225+
%call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 true) nounwind readnone
226+
227+
%arrayidx4 = getelementptr inbounds i32* %a, i32 2
228+
%i4 = load i32* %arrayidx4, align 4
229+
%arrayidx5 = getelementptr inbounds i32* %b, i32 2
230+
%i5 = load i32* %arrayidx5, align 4
231+
%add3 = add i32 %i4, %i5
232+
%call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
233+
234+
%arrayidx6 = getelementptr inbounds i32* %a, i32 3
235+
%i6 = load i32* %arrayidx6, align 4
236+
%arrayidx7 = getelementptr inbounds i32* %b, i32 3
237+
%i7 = load i32* %arrayidx7, align 4
238+
%add4 = add i32 %i6, %i7
239+
%call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 true) nounwind readnone
240+
241+
store i32 %call1, i32* %c, align 4
242+
%arrayidx8 = getelementptr inbounds i32* %c, i32 1
243+
store i32 %call2, i32* %arrayidx8, align 4
244+
%arrayidx9 = getelementptr inbounds i32* %c, i32 2
245+
store i32 %call3, i32* %arrayidx9, align 4
246+
%arrayidx10 = getelementptr inbounds i32* %c, i32 3
247+
store i32 %call4, i32* %arrayidx10, align 4
248+
ret void
249+
250+
; CHECK-LABEL: @vec_cttz_i32(
251+
; CHECK: load <4 x i32>
252+
; CHECK: load <4 x i32>
253+
; CHECK: call <4 x i32> @llvm.cttz.v4i32
254+
; CHECK: store <4 x i32>
255+
; CHECK: ret
256+
}
257+
258+
define void @vec_cttz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
259+
entry:
260+
%i0 = load i32* %a, align 4
261+
%i1 = load i32* %b, align 4
262+
%add1 = add i32 %i0, %i1
263+
%call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
264+
265+
%arrayidx2 = getelementptr inbounds i32* %a, i32 1
266+
%i2 = load i32* %arrayidx2, align 4
267+
%arrayidx3 = getelementptr inbounds i32* %b, i32 1
268+
%i3 = load i32* %arrayidx3, align 4
269+
%add2 = add i32 %i2, %i3
270+
%call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 false) nounwind readnone
271+
272+
%arrayidx4 = getelementptr inbounds i32* %a, i32 2
273+
%i4 = load i32* %arrayidx4, align 4
274+
%arrayidx5 = getelementptr inbounds i32* %b, i32 2
275+
%i5 = load i32* %arrayidx5, align 4
276+
%add3 = add i32 %i4, %i5
277+
%call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
278+
279+
%arrayidx6 = getelementptr inbounds i32* %a, i32 3
280+
%i6 = load i32* %arrayidx6, align 4
281+
%arrayidx7 = getelementptr inbounds i32* %b, i32 3
282+
%i7 = load i32* %arrayidx7, align 4
283+
%add4 = add i32 %i6, %i7
284+
%call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 false) nounwind readnone
285+
286+
store i32 %call1, i32* %c, align 4
287+
%arrayidx8 = getelementptr inbounds i32* %c, i32 1
288+
store i32 %call2, i32* %arrayidx8, align 4
289+
%arrayidx9 = getelementptr inbounds i32* %c, i32 2
290+
store i32 %call3, i32* %arrayidx9, align 4
291+
%arrayidx10 = getelementptr inbounds i32* %c, i32 3
292+
store i32 %call4, i32* %arrayidx10, align 4
293+
ret void
294+
295+
; CHECK-LABEL: @vec_cttz_i32_neg(
296+
; CHECK-NOT: call <4 x i32> @llvm.cttz.v4i32
297+
}
298+
299+
300+
declare float @llvm.powi.f32(float, i32)
301+
define void @vec_powi_f32(float* %a, float* %b, float* %c, i32 %P) {
302+
entry:
303+
%i0 = load float* %a, align 4
304+
%i1 = load float* %b, align 4
305+
%add1 = fadd float %i0, %i1
306+
%call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
307+
308+
%arrayidx2 = getelementptr inbounds float* %a, i32 1
309+
%i2 = load float* %arrayidx2, align 4
310+
%arrayidx3 = getelementptr inbounds float* %b, i32 1
311+
%i3 = load float* %arrayidx3, align 4
312+
%add2 = fadd float %i2, %i3
313+
%call2 = tail call float @llvm.powi.f32(float %add2,i32 %P) nounwind readnone
314+
315+
%arrayidx4 = getelementptr inbounds float* %a, i32 2
316+
%i4 = load float* %arrayidx4, align 4
317+
%arrayidx5 = getelementptr inbounds float* %b, i32 2
318+
%i5 = load float* %arrayidx5, align 4
319+
%add3 = fadd float %i4, %i5
320+
%call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
321+
322+
%arrayidx6 = getelementptr inbounds float* %a, i32 3
323+
%i6 = load float* %arrayidx6, align 4
324+
%arrayidx7 = getelementptr inbounds float* %b, i32 3
325+
%i7 = load float* %arrayidx7, align 4
326+
%add4 = fadd float %i6, %i7
327+
%call4 = tail call float @llvm.powi.f32(float %add4,i32 %P) nounwind readnone
328+
329+
store float %call1, float* %c, align 4
330+
%arrayidx8 = getelementptr inbounds float* %c, i32 1
331+
store float %call2, float* %arrayidx8, align 4
332+
%arrayidx9 = getelementptr inbounds float* %c, i32 2
333+
store float %call3, float* %arrayidx9, align 4
334+
%arrayidx10 = getelementptr inbounds float* %c, i32 3
335+
store float %call4, float* %arrayidx10, align 4
336+
ret void
337+
338+
; CHECK-LABEL: @vec_powi_f32(
339+
; CHECK: load <4 x float>
340+
; CHECK: load <4 x float>
341+
; CHECK: call <4 x float> @llvm.powi.v4f32
342+
; CHECK: store <4 x float>
343+
; CHECK: ret
344+
}
345+
346+
347+
define void @vec_powi_f32_neg(float* %a, float* %b, float* %c, i32 %P, i32 %Q) {
348+
entry:
349+
%i0 = load float* %a, align 4
350+
%i1 = load float* %b, align 4
351+
%add1 = fadd float %i0, %i1
352+
%call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
353+
354+
%arrayidx2 = getelementptr inbounds float* %a, i32 1
355+
%i2 = load float* %arrayidx2, align 4
356+
%arrayidx3 = getelementptr inbounds float* %b, i32 1
357+
%i3 = load float* %arrayidx3, align 4
358+
%add2 = fadd float %i2, %i3
359+
%call2 = tail call float @llvm.powi.f32(float %add2,i32 %Q) nounwind readnone
360+
361+
%arrayidx4 = getelementptr inbounds float* %a, i32 2
362+
%i4 = load float* %arrayidx4, align 4
363+
%arrayidx5 = getelementptr inbounds float* %b, i32 2
364+
%i5 = load float* %arrayidx5, align 4
365+
%add3 = fadd float %i4, %i5
366+
%call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
367+
368+
%arrayidx6 = getelementptr inbounds float* %a, i32 3
369+
%i6 = load float* %arrayidx6, align 4
370+
%arrayidx7 = getelementptr inbounds float* %b, i32 3
371+
%i7 = load float* %arrayidx7, align 4
372+
%add4 = fadd float %i6, %i7
373+
%call4 = tail call float @llvm.powi.f32(float %add4,i32 %Q) nounwind readnone
374+
375+
store float %call1, float* %c, align 4
376+
%arrayidx8 = getelementptr inbounds float* %c, i32 1
377+
store float %call2, float* %arrayidx8, align 4
378+
%arrayidx9 = getelementptr inbounds float* %c, i32 2
379+
store float %call3, float* %arrayidx9, align 4
380+
%arrayidx10 = getelementptr inbounds float* %c, i32 3
381+
store float %call4, float* %arrayidx10, align 4
382+
ret void
383+
384+
; CHECK-LABEL: @vec_powi_f32_neg(
385+
; CHECK-NOT: call <4 x float> @llvm.powi.v4f32
386+
}

0 commit comments

Comments
 (0)
Please sign in to comment.