Skip to content

Commit 7b84fd7

Browse files
committedSep 14, 2018
[ARM] bottom-top mul support in ARMParallelDSP
On failing to find sequences that can be converted into dual macs, try to find sequential 16-bit loads that are used by muls which we can then use smultb, smulbt, smultt with a wide load. Differential Revision: https://reviews.llvm.org/D51983 llvm-svn: 342210
1 parent 3afb974 commit 7b84fd7

File tree

3 files changed

+612
-27
lines changed

3 files changed

+612
-27
lines changed
 

‎llvm/lib/Target/ARM/ARMParallelDSP.cpp

+152-27
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ namespace {
5555
using ReductionList = SmallVector<Reduction, 8>;
5656
using ValueList = SmallVector<Value*, 8>;
5757
using MemInstList = SmallVector<Instruction*, 8>;
58+
using LoadInstList = SmallVector<LoadInst*, 8>;
5859
using PMACPair = std::pair<BinOpChain*,BinOpChain*>;
5960
using PMACPairList = SmallVector<PMACPair, 8>;
6061
using Instructions = SmallVector<Instruction*,16>;
@@ -63,7 +64,8 @@ namespace {
6364
struct OpChain {
6465
Instruction *Root;
6566
ValueList AllValues;
66-
MemInstList VecLd; // List of all load instructions.
67+
MemInstList VecLd; // List of all sequential load instructions.
68+
LoadInstList Loads; // List of all load instructions.
6769
MemLocList MemLocs; // All memory locations read by this tree.
6870
bool ReadOnly = true;
6971

@@ -76,8 +78,10 @@ namespace {
7678
if (auto *I = dyn_cast<Instruction>(V)) {
7779
if (I->mayWriteToMemory())
7880
ReadOnly = false;
79-
if (auto *Ld = dyn_cast<LoadInst>(V))
81+
if (auto *Ld = dyn_cast<LoadInst>(V)) {
8082
MemLocs.push_back(MemoryLocation(Ld->getPointerOperand(), Size));
83+
Loads.push_back(Ld);
84+
}
8185
}
8286
}
8387
}
@@ -135,6 +139,7 @@ namespace {
135139
/// exchange the halfwords of the second operand before performing the
136140
/// arithmetic.
137141
bool MatchSMLAD(Function &F);
142+
bool MatchTopBottomMuls(BasicBlock *LoopBody);
138143

139144
public:
140145
static char ID;
@@ -203,6 +208,8 @@ namespace {
203208
LLVM_DEBUG(dbgs() << "\n== Parallel DSP pass ==\n");
204209
LLVM_DEBUG(dbgs() << " - " << F.getName() << "\n\n");
205210
Changes = MatchSMLAD(F);
211+
if (!Changes)
212+
Changes = MatchTopBottomMuls(Header);
206213
return Changes;
207214
}
208215
};
@@ -496,10 +503,10 @@ static void MatchReductions(Function &F, Loop *TheLoop, BasicBlock *Header,
496503
);
497504
}
498505

499-
static void AddMACCandidate(OpChainList &Candidates,
506+
static void AddMulCandidate(OpChainList &Candidates,
500507
Instruction *Mul,
501508
Value *MulOp0, Value *MulOp1) {
502-
LLVM_DEBUG(dbgs() << "OK, found acc mul:\t"; Mul->dump());
509+
LLVM_DEBUG(dbgs() << "OK, found mul:\t"; Mul->dump());
503510
assert(Mul->getOpcode() == Instruction::Mul &&
504511
"expected mul instruction");
505512
ValueList LHS;
@@ -533,14 +540,14 @@ static void MatchParallelMACSequences(Reduction &R,
533540
break;
534541
case Instruction::Mul:
535542
if (match (I, (m_Mul(m_Value(MulOp0), m_Value(MulOp1))))) {
536-
AddMACCandidate(Candidates, I, MulOp0, MulOp1);
543+
AddMulCandidate(Candidates, I, MulOp0, MulOp1);
537544
return false;
538545
}
539546
break;
540547
case Instruction::SExt:
541548
if (match (I, (m_SExt(m_Mul(m_Value(MulOp0), m_Value(MulOp1)))))) {
542549
Instruction *Mul = cast<Instruction>(I->getOperand(0));
543-
AddMACCandidate(Candidates, Mul, MulOp0, MulOp1);
550+
AddMulCandidate(Candidates, Mul, MulOp0, MulOp1);
544551
return false;
545552
}
546553
break;
@@ -569,23 +576,24 @@ static void AliasCandidates(BasicBlock *Header, Instructions &Reads,
569576
// the memory locations accessed by the MAC-chains.
570577
// TODO: we need the read statements when we accept more complicated chains.
571578
static bool AreAliased(AliasAnalysis *AA, Instructions &Reads,
572-
Instructions &Writes, OpChainList &MACCandidates) {
579+
Instructions &Writes, OpChainList &Candidates) {
573580
LLVM_DEBUG(dbgs() << "Alias checks:\n");
574-
for (auto &MAC : MACCandidates) {
575-
LLVM_DEBUG(dbgs() << "mul: "; MAC->Root->dump());
581+
for (auto &Candidate : Candidates) {
582+
LLVM_DEBUG(dbgs() << "mul: "; Candidate->Root->dump());
583+
Candidate->SetMemoryLocations();
576584

577585
// At the moment, we allow only simple chains that only consist of reads,
578586
// accumulate their result with an integer add, and thus that don't write
579587
// memory, and simply bail if they do.
580-
if (!MAC->ReadOnly)
588+
if (!Candidate->ReadOnly)
581589
return true;
582590

583591
// Now for all writes in the basic block, check that they don't alias with
584592
// the memory locations accessed by our MAC-chain:
585593
for (auto *I : Writes) {
586594
LLVM_DEBUG(dbgs() << "- "; I->dump());
587-
assert(MAC->MemLocs.size() >= 2 && "expecting at least 2 memlocs");
588-
for (auto &MemLoc : MAC->MemLocs) {
595+
assert(Candidate->MemLocs.size() >= 2 && "expecting at least 2 memlocs");
596+
for (auto &MemLoc : Candidate->MemLocs) {
589597
if (isModOrRefSet(intersectModRef(AA->getModRefInfo(I, MemLoc),
590598
ModRefInfo::ModRef))) {
591599
LLVM_DEBUG(dbgs() << "Yes, aliases found\n");
@@ -599,15 +607,14 @@ static bool AreAliased(AliasAnalysis *AA, Instructions &Reads,
599607
return false;
600608
}
601609

602-
static bool CheckMACMemory(OpChainList &Candidates) {
610+
static bool CheckMulMemory(OpChainList &Candidates) {
603611
for (auto &C : Candidates) {
604612
// A mul has 2 operands, and a narrow op consist of sext and a load; thus
605613
// we expect at least 4 items in this operand value list.
606614
if (C->size() < 4) {
607615
LLVM_DEBUG(dbgs() << "Operand list too short.\n");
608616
return false;
609617
}
610-
C->SetMemoryLocations();
611618
ValueList &LHS = static_cast<BinOpChain*>(C.get())->LHS;
612619
ValueList &RHS = static_cast<BinOpChain*>(C.get())->RHS;
613620

@@ -620,6 +627,131 @@ static bool CheckMACMemory(OpChainList &Candidates) {
620627
return true;
621628
}
622629

630+
static LoadInst *CreateLoadIns(IRBuilder<NoFolder> &IRB, LoadInst *BaseLoad,
631+
const Type *LoadTy) {
632+
const unsigned AddrSpace = BaseLoad->getPointerAddressSpace();
633+
634+
Value *VecPtr = IRB.CreateBitCast(BaseLoad->getPointerOperand(),
635+
LoadTy->getPointerTo(AddrSpace));
636+
return IRB.CreateAlignedLoad(VecPtr, BaseLoad->getAlignment());
637+
}
638+
639+
/// Attempt to widen loads and use smulbb, smulbt, smultb and smultt muls.
640+
// TODO: This, like smlad generation, expects the leave operands to be loads
641+
// that are sign extended. We should be able to handle scalar values as well
642+
// performing these muls on word x half types to generate smulwb and smulwt.
643+
bool ARMParallelDSP::MatchTopBottomMuls(BasicBlock *LoopBody) {
644+
LLVM_DEBUG(dbgs() << "Attempting to find BT|TB muls.\n");
645+
646+
OpChainList Candidates;
647+
for (auto &I : *LoopBody) {
648+
if (I.getOpcode() == Instruction::Mul) {
649+
if (I.getType()->getScalarSizeInBits() == 32 ||
650+
I.getType()->getScalarSizeInBits() == 64)
651+
AddMulCandidate(Candidates, &I, I.getOperand(0), I.getOperand(1));
652+
}
653+
}
654+
655+
if (Candidates.empty())
656+
return false;
657+
658+
Instructions Reads;
659+
Instructions Writes;
660+
AliasCandidates(LoopBody, Reads, Writes);
661+
662+
if (AreAliased(AA, Reads, Writes, Candidates))
663+
return false;
664+
665+
DenseMap<LoadInst*, Instruction*> LoadUsers;
666+
DenseMap<LoadInst*, LoadInst*> SeqLoads;
667+
SmallPtrSet<LoadInst*, 8> OffsetLoads;
668+
669+
for (unsigned i = 0; i < Candidates.size(); ++i) {
670+
for (unsigned j = 0; j < Candidates.size(); ++j) {
671+
if (i == j)
672+
continue;
673+
674+
OpChain *MulChain0 = Candidates[i].get();
675+
OpChain *MulChain1 = Candidates[j].get();
676+
677+
for (auto *Ld0 : MulChain0->Loads) {
678+
if (SeqLoads.count(Ld0) || OffsetLoads.count(Ld0))
679+
continue;
680+
681+
for (auto *Ld1 : MulChain1->Loads) {
682+
if (SeqLoads.count(Ld1) || OffsetLoads.count(Ld1))
683+
continue;
684+
685+
MemInstList VecMem;
686+
if (AreSequentialLoads(Ld0, Ld1, VecMem)) {
687+
SeqLoads[Ld0] = Ld1;
688+
OffsetLoads.insert(Ld1);
689+
LoadUsers[Ld0] = MulChain0->Root;
690+
LoadUsers[Ld1] = MulChain1->Root;
691+
}
692+
}
693+
}
694+
}
695+
}
696+
697+
if (SeqLoads.empty())
698+
return false;
699+
700+
IRBuilder<NoFolder> IRB(LoopBody);
701+
const Type *Ty = IntegerType::get(M->getContext(), 32);
702+
703+
// We know that at least one of the operands is a SExt of Ld.
704+
auto GetSExt = [](Instruction *I, LoadInst *Ld, unsigned OpIdx) -> Instruction* {
705+
if (!isa<Instruction>(I->getOperand(OpIdx)))
706+
return nullptr;
707+
708+
Value *SExt = nullptr;
709+
if (cast<Instruction>(I->getOperand(OpIdx))->getOperand(0) == Ld)
710+
SExt = I->getOperand(0);
711+
else
712+
SExt = I->getOperand(1);
713+
714+
return cast<Instruction>(SExt);
715+
};
716+
717+
LLVM_DEBUG(dbgs() << "Found some sequential loads, now widening:\n");
718+
for (auto &Pair : SeqLoads) {
719+
LoadInst *BaseLd = Pair.first;
720+
LoadInst *OffsetLd = Pair.second;
721+
IRB.SetInsertPoint(BaseLd);
722+
LoadInst *WideLd = CreateLoadIns(IRB, BaseLd, Ty);
723+
LLVM_DEBUG(dbgs() << " - with base load: " << *BaseLd << "\n");
724+
LLVM_DEBUG(dbgs() << " - created wide load: " << *WideLd << "\n");
725+
Instruction *BaseUser = LoadUsers[BaseLd];
726+
Instruction *OffsetUser = LoadUsers[OffsetLd];
727+
728+
Instruction *BaseSExt = GetSExt(BaseUser, BaseLd, 0);
729+
if (!BaseSExt)
730+
BaseSExt = GetSExt(BaseUser, BaseLd, 1);
731+
Instruction *OffsetSExt = GetSExt(OffsetUser, OffsetLd, 0);
732+
if (!OffsetSExt)
733+
OffsetSExt = GetSExt(OffsetUser, OffsetLd, 1);
734+
735+
assert((BaseSExt && OffsetSExt) && "failed to find SExts");
736+
737+
// BaseUser needs to: (asr (shl WideLoad, 16), 16)
738+
// OffsetUser needs to: (asr WideLoad, 16)
739+
auto *Shl = cast<Instruction>(IRB.CreateShl(WideLd, 16));
740+
auto *Bottom = cast<Instruction>(IRB.CreateAShr(Shl, 16));
741+
auto *Top = cast<Instruction>(IRB.CreateAShr(WideLd, 16));
742+
BaseUser->replaceUsesOfWith(BaseSExt, Bottom);
743+
OffsetUser->replaceUsesOfWith(OffsetSExt, Top);
744+
745+
BaseSExt->eraseFromParent();
746+
OffsetSExt->eraseFromParent();
747+
BaseLd->eraseFromParent();
748+
OffsetLd->eraseFromParent();
749+
}
750+
LLVM_DEBUG(dbgs() << "Block after top bottom mul replacements:\n"
751+
<< *LoopBody << "\n");
752+
return true;
753+
}
754+
623755
// Loop Pass that needs to identify integer add/sub reductions of 16-bit vector
624756
// multiplications.
625757
// To use SMLAD:
@@ -658,14 +790,15 @@ bool ARMParallelDSP::MatchSMLAD(Function &F) {
658790
dbgs() << "Header block:\n"; Header->dump();
659791
dbgs() << "Loop info:\n\n"; L->dump());
660792

661-
bool Changed = false;
662793
ReductionList Reductions;
663794
MatchReductions(F, L, Header, Reductions);
795+
if (Reductions.empty())
796+
return false;
664797

665798
for (auto &R : Reductions) {
666799
OpChainList MACCandidates;
667800
MatchParallelMACSequences(R, MACCandidates);
668-
if (!CheckMACMemory(MACCandidates))
801+
if (!CheckMulMemory(MACCandidates))
669802
continue;
670803

671804
R.MACCandidates = std::move(MACCandidates);
@@ -682,6 +815,7 @@ bool ARMParallelDSP::MatchSMLAD(Function &F) {
682815
Instructions Reads, Writes;
683816
AliasCandidates(Header, Reads, Writes);
684817

818+
bool Changed = false;
685819
for (auto &R : Reductions) {
686820
if (AreAliased(AA, Reads, Writes, R.MACCandidates))
687821
return false;
@@ -693,15 +827,6 @@ bool ARMParallelDSP::MatchSMLAD(Function &F) {
693827
return Changed;
694828
}
695829

696-
static LoadInst *CreateLoadIns(IRBuilder<NoFolder> &IRB, LoadInst &BaseLoad,
697-
const Type *LoadTy) {
698-
const unsigned AddrSpace = BaseLoad.getPointerAddressSpace();
699-
700-
Value *VecPtr = IRB.CreateBitCast(BaseLoad.getPointerOperand(),
701-
LoadTy->getPointerTo(AddrSpace));
702-
return IRB.CreateAlignedLoad(VecPtr, BaseLoad.getAlignment());
703-
}
704-
705830
Instruction *ARMParallelDSP::CreateSMLADCall(LoadInst *VecLd0, LoadInst *VecLd1,
706831
Instruction *Acc, bool Exchange,
707832
Instruction *InsertAfter) {
@@ -716,8 +841,8 @@ Instruction *ARMParallelDSP::CreateSMLADCall(LoadInst *VecLd0, LoadInst *VecLd1,
716841

717842
// Replace the reduction chain with an intrinsic call
718843
const Type *Ty = IntegerType::get(M->getContext(), 32);
719-
LoadInst *NewLd0 = CreateLoadIns(Builder, VecLd0[0], Ty);
720-
LoadInst *NewLd1 = CreateLoadIns(Builder, VecLd1[0], Ty);
844+
LoadInst *NewLd0 = CreateLoadIns(Builder, &VecLd0[0], Ty);
845+
LoadInst *NewLd1 = CreateLoadIns(Builder, &VecLd1[0], Ty);
721846
Value* Args[] = { NewLd0, NewLd1, Acc };
722847
Function *SMLAD = nullptr;
723848
if (Exchange)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
2+
3+
; CHECK-LABEL: topbottom_mul_alias
4+
; CHECK-NOT: bitcast i16*
5+
define void @topbottom_mul_alias(i32 %N, i32* nocapture readnone %Out, i16* nocapture readonly %In1, i16* nocapture readonly %In2) {
6+
entry:
7+
br label %for.body
8+
9+
for.body:
10+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
11+
%count = phi i32 [ %N, %entry ], [ %count.next, %for.body ]
12+
%PIn1.0 = getelementptr inbounds i16, i16* %In1, i32 %iv
13+
%In1.0 = load i16, i16* %PIn1.0, align 2
14+
%SIn1.0 = sext i16 %In1.0 to i32
15+
%PIn2.0 = getelementptr inbounds i16, i16* %In2, i32 %iv
16+
%In2.0 = load i16, i16* %PIn2.0, align 2
17+
%SIn2.0 = sext i16 %In2.0 to i32
18+
%mul5.us.i.i = mul nsw i32 %SIn1.0, %SIn2.0
19+
%Out.0 = getelementptr inbounds i32, i32* %Out, i32 %iv
20+
store i32 %mul5.us.i.i, i32* %Out.0, align 4
21+
%iv.1 = or i32 %iv, 1
22+
%PIn1.1 = getelementptr inbounds i16, i16* %In1, i32 %iv.1
23+
%In1.1 = load i16, i16* %PIn1.1, align 2
24+
%SIn1.1 = sext i16 %In1.1 to i32
25+
%PIn2.1 = getelementptr inbounds i16, i16* %In2, i32 %iv.1
26+
%In2.1 = load i16, i16* %PIn2.1, align 2
27+
%SIn2.1 = sext i16 %In2.1 to i32
28+
%mul5.us.i.1.i = mul nsw i32 %SIn1.1, %SIn2.1
29+
%Out.1 = getelementptr inbounds i32, i32* %Out, i32 %iv.1
30+
store i32 %mul5.us.i.1.i, i32* %Out.1, align 4
31+
%iv.2 = or i32 %iv, 2
32+
%PIn1.2 = getelementptr inbounds i16, i16* %In1, i32 %iv.2
33+
%In1.2 = load i16, i16* %PIn1.2, align 2
34+
%SIn1.2 = sext i16 %In1.2 to i32
35+
%PIn2.2 = getelementptr inbounds i16, i16* %In2, i32 %iv.2
36+
%In2.2 = load i16, i16* %PIn2.2, align 2
37+
%SIn2.2 = sext i16 %In2.2 to i32
38+
%mul5.us.i.2.i = mul nsw i32 %SIn1.2, %SIn2.2
39+
%Out.2 = getelementptr inbounds i32, i32* %Out, i32 %iv.2
40+
store i32 %mul5.us.i.2.i, i32* %Out.2, align 4
41+
%iv.3 = or i32 %iv, 3
42+
%PIn1.3 = getelementptr inbounds i16, i16* %In1, i32 %iv.3
43+
%In1.3 = load i16, i16* %PIn1.3, align 2
44+
%SIn1.3 = sext i16 %In1.3 to i32
45+
%PIn2.3 = getelementptr inbounds i16, i16* %In2, i32 %iv.3
46+
%In2.3 = load i16, i16* %PIn2.3, align 2
47+
%SIn2.3 = sext i16 %In2.3 to i32
48+
%mul5.us.i.3.i = mul nsw i32 %SIn1.3, %SIn2.3
49+
%Out.3 = getelementptr inbounds i32, i32* %Out, i32 %iv.3
50+
store i32 %mul5.us.i.3.i, i32* %Out.3, align 4
51+
%iv.next = add i32 %iv, 4
52+
%count.next = add i32 %count, -4
53+
%niter375.ncmp.3.i = icmp eq i32 %count.next, 0
54+
br i1 %niter375.ncmp.3.i, label %exit, label %for.body
55+
56+
exit:
57+
ret void
58+
}
59+
60+
; TODO: We should be able to handle this by splatting the const value.
61+
; CHECK-LABEL: topbottom_mul_const
62+
; CHECK-NOT: bitcast i16*
63+
define void @topbottom_mul_const(i32 %N, i32* noalias nocapture readnone %Out, i16* nocapture readonly %In, i16 signext %const) {
64+
entry:
65+
%conv4.i.i = sext i16 %const to i32
66+
br label %for.body
67+
68+
for.body:
69+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
70+
%count = phi i32 [ %N, %entry ], [ %count.next, %for.body ]
71+
%PIn.0 = getelementptr inbounds i16, i16* %In, i32 %iv
72+
%In.0 = load i16, i16* %PIn.0, align 2
73+
%conv.us.i144.i = sext i16 %In.0 to i32
74+
%mul5.us.i.i = mul nsw i32 %conv.us.i144.i, %conv4.i.i
75+
%Out.0 = getelementptr inbounds i32, i32* %Out, i32 %iv
76+
store i32 %mul5.us.i.i, i32* %Out.0, align 4
77+
%iv.1 = or i32 %iv, 1
78+
%PIn.1 = getelementptr inbounds i16, i16* %In, i32 %iv.1
79+
%In.1 = load i16, i16* %PIn.1, align 2
80+
%conv.us.i144.1.i = sext i16 %In.1 to i32
81+
%mul5.us.i.1.i = mul nsw i32 %conv.us.i144.1.i, %conv4.i.i
82+
%Out.1 = getelementptr inbounds i32, i32* %Out, i32 %iv.1
83+
store i32 %mul5.us.i.1.i, i32* %Out.1, align 4
84+
%iv.2 = or i32 %iv, 2
85+
%PIn.2 = getelementptr inbounds i16, i16* %In, i32 %iv.2
86+
%In.3 = load i16, i16* %PIn.2, align 2
87+
%conv.us.i144.2.i = sext i16 %In.3 to i32
88+
%mul5.us.i.2.i = mul nsw i32 %conv.us.i144.2.i, %conv4.i.i
89+
%Out.2 = getelementptr inbounds i32, i32* %Out, i32 %iv.2
90+
store i32 %mul5.us.i.2.i, i32* %Out.2, align 4
91+
%iv.3 = or i32 %iv, 3
92+
%PIn.3 = getelementptr inbounds i16, i16* %In, i32 %iv.3
93+
%In.4 = load i16, i16* %PIn.3, align 2
94+
%conv.us.i144.3.i = sext i16 %In.4 to i32
95+
%mul5.us.i.3.i = mul nsw i32 %conv.us.i144.3.i, %conv4.i.i
96+
%Out.3 = getelementptr inbounds i32, i32* %Out, i32 %iv.3
97+
store i32 %mul5.us.i.3.i, i32* %Out.3, align 4
98+
%iv.next = add i32 %iv, 4
99+
%count.next = add i32 %count, -4
100+
%niter375.ncmp.3.i = icmp eq i32 %count.next, 0
101+
br i1 %niter375.ncmp.3.i, label %exit, label %for.body
102+
103+
exit:
104+
ret void
105+
}
106+
107+
; TODO: We should be able to handle this and use smulwt and smulwb.
108+
; CHECK-LABEL: topbottom_mul_word_load_const
109+
; CHECK-NOT: bitcast i16*
110+
define void @topbottom_mul_word_load_const(i32 %N, i32* noalias nocapture readnone %Out, i16* nocapture readonly %In, i32* %C) {
111+
entry:
112+
%const = load i32, i32* %C
113+
br label %for.body
114+
115+
for.body:
116+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
117+
%count = phi i32 [ %N, %entry ], [ %count.next, %for.body ]
118+
%PIn.0 = getelementptr inbounds i16, i16* %In, i32 %iv
119+
%In.0 = load i16, i16* %PIn.0, align 2
120+
%conv.us.i144.i = sext i16 %In.0 to i32
121+
%mul5.us.i.i = mul nsw i32 %conv.us.i144.i, %const
122+
%Out.0 = getelementptr inbounds i32, i32* %Out, i32 %iv
123+
store i32 %mul5.us.i.i, i32* %Out.0, align 4
124+
%iv.1 = or i32 %iv, 1
125+
%PIn.1 = getelementptr inbounds i16, i16* %In, i32 %iv.1
126+
%In.1 = load i16, i16* %PIn.1, align 2
127+
%conv.us.i144.1.i = sext i16 %In.1 to i32
128+
%mul5.us.i.1.i = mul nsw i32 %conv.us.i144.1.i, %const
129+
%Out.1 = getelementptr inbounds i32, i32* %Out, i32 %iv.1
130+
store i32 %mul5.us.i.1.i, i32* %Out.1, align 4
131+
%iv.2 = or i32 %iv, 2
132+
%PIn.2 = getelementptr inbounds i16, i16* %In, i32 %iv.2
133+
%In.3 = load i16, i16* %PIn.2, align 2
134+
%conv.us.i144.2.i = sext i16 %In.3 to i32
135+
%mul5.us.i.2.i = mul nsw i32 %conv.us.i144.2.i, %const
136+
%Out.2 = getelementptr inbounds i32, i32* %Out, i32 %iv.2
137+
store i32 %mul5.us.i.2.i, i32* %Out.2, align 4
138+
%iv.3 = or i32 %iv, 3
139+
%PIn.3 = getelementptr inbounds i16, i16* %In, i32 %iv.3
140+
%In.4 = load i16, i16* %PIn.3, align 2
141+
%conv.us.i144.3.i = sext i16 %In.4 to i32
142+
%mul5.us.i.3.i = mul nsw i32 %conv.us.i144.3.i, %const
143+
%Out.3 = getelementptr inbounds i32, i32* %Out, i32 %iv.3
144+
store i32 %mul5.us.i.3.i, i32* %Out.3, align 4
145+
%iv.next = add i32 %iv, 4
146+
%count.next = add i32 %count, -4
147+
%niter375.ncmp.3.i = icmp eq i32 %count.next, 0
148+
br i1 %niter375.ncmp.3.i, label %exit, label %for.body
149+
150+
exit:
151+
ret void
152+
}
153+
154+
; CHECK-LABEL: topbottom_mul_8
155+
; CHECK-NOT: bitcast i16*
156+
define void @topbottom_mul_8(i32 %N, i32* noalias nocapture readnone %Out, i8* nocapture readonly %In1, i8* nocapture readonly %In2) {
157+
entry:
158+
br label %for.body
159+
160+
for.body:
161+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
162+
%count = phi i32 [ %N, %entry ], [ %count.next, %for.body ]
163+
%PIn1.0 = getelementptr inbounds i8, i8* %In1, i32 %iv
164+
%In1.0 = load i8, i8* %PIn1.0, align 1
165+
%SIn1.0 = sext i8 %In1.0 to i32
166+
%PIn2.0 = getelementptr inbounds i8, i8* %In2, i32 %iv
167+
%In2.0 = load i8, i8* %PIn2.0, align 1
168+
%SIn2.0 = sext i8 %In2.0 to i32
169+
%mul5.us.i.i = mul nsw i32 %SIn1.0, %SIn2.0
170+
%Out.0 = getelementptr inbounds i32, i32* %Out, i32 %iv
171+
store i32 %mul5.us.i.i, i32* %Out.0, align 4
172+
%iv.1 = or i32 %iv, 1
173+
%PIn1.1 = getelementptr inbounds i8, i8* %In1, i32 %iv.1
174+
%In1.1 = load i8, i8* %PIn1.1, align 1
175+
%SIn1.1 = sext i8 %In1.1 to i32
176+
%PIn2.1 = getelementptr inbounds i8, i8* %In2, i32 %iv.1
177+
%In2.1 = load i8, i8* %PIn2.1, align 1
178+
%SIn2.1 = sext i8 %In2.1 to i32
179+
%mul5.us.i.1.i = mul nsw i32 %SIn1.1, %SIn2.1
180+
%Out.1 = getelementptr inbounds i32, i32* %Out, i32 %iv.1
181+
store i32 %mul5.us.i.1.i, i32* %Out.1, align 4
182+
%iv.2 = or i32 %iv, 2
183+
%PIn1.2 = getelementptr inbounds i8, i8* %In1, i32 %iv.2
184+
%In1.2 = load i8, i8* %PIn1.2, align 1
185+
%SIn1.2 = sext i8 %In1.2 to i32
186+
%PIn2.2 = getelementptr inbounds i8, i8* %In2, i32 %iv.2
187+
%In2.2 = load i8, i8* %PIn2.2, align 1
188+
%SIn2.2 = sext i8 %In2.2 to i32
189+
%mul5.us.i.2.i = mul nsw i32 %SIn1.2, %SIn2.2
190+
%Out.2 = getelementptr inbounds i32, i32* %Out, i32 %iv.2
191+
store i32 %mul5.us.i.2.i, i32* %Out.2, align 4
192+
%iv.3 = or i32 %iv, 3
193+
%PIn1.3 = getelementptr inbounds i8, i8* %In1, i32 %iv.3
194+
%In1.3 = load i8, i8* %PIn1.3, align 1
195+
%SIn1.3 = sext i8 %In1.3 to i32
196+
%PIn2.3 = getelementptr inbounds i8, i8* %In2, i32 %iv.3
197+
%In2.3 = load i8, i8* %PIn2.3, align 1
198+
%SIn2.3 = sext i8 %In2.3 to i32
199+
%mul5.us.i.3.i = mul nsw i32 %SIn1.3, %SIn2.3
200+
%Out.3 = getelementptr inbounds i32, i32* %Out, i32 %iv.3
201+
store i32 %mul5.us.i.3.i, i32* %Out.3, align 4
202+
%iv.next = add i32 %iv, 4
203+
%count.next = add i32 %count, -4
204+
%niter375.ncmp.3.i = icmp eq i32 %count.next, 0
205+
br i1 %niter375.ncmp.3.i, label %exit, label %for.body
206+
207+
exit:
208+
ret void
209+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
2+
3+
; CHECK-LABEL: topbottom_mul
4+
define void @topbottom_mul(i32 %N, i32* noalias nocapture readnone %Out, i16* nocapture readonly %In1, i16* nocapture readonly %In2) {
5+
entry:
6+
br label %for.body
7+
8+
; CHECK: for.body:
9+
; CHECK: [[Cast_PIn1_0:%[^ ]+]] = bitcast i16* %PIn1.0 to i32*
10+
; CHECK: [[PIn1_01:%[^ ]+]] = load i32, i32* [[Cast_PIn1_0]], align 2
11+
; CHECK: [[PIn1_01_shl:%[^ ]+]] = shl i32 [[PIn1_01]], 16
12+
; CHECK: [[PIn1_0:%[^ ]+]] = ashr i32 [[PIn1_01_shl]], 16
13+
; CHECK: [[PIn1_1:%[^ ]+]] = ashr i32 [[PIn1_01]], 16
14+
15+
; CHECK: [[Cast_PIn2_0:%[^ ]+]] = bitcast i16* %PIn2.0 to i32*
16+
; CHECK: [[PIn2_01:%[^ ]+]] = load i32, i32* [[Cast_PIn2_0]], align 2
17+
; CHECK: [[PIn2_01_shl:%[^ ]+]] = shl i32 [[PIn2_01]], 16
18+
; CHECK: [[PIn2_0:%[^ ]+]] = ashr i32 [[PIn2_01_shl]], 16
19+
; CHECK: [[PIn2_1:%[^ ]+]] = ashr i32 [[PIn2_01]], 16
20+
21+
; CHECK: mul nsw i32 [[PIn1_0]], [[PIn2_0]]
22+
; CHECK: mul nsw i32 [[PIn1_1]], [[PIn2_1]]
23+
24+
; CHECK: [[Cast_PIn1_2:%[^ ]+]] = bitcast i16* %PIn1.2 to i32*
25+
; CHECK: [[PIn1_23:%[^ ]+]] = load i32, i32* [[Cast_PIn1_2]], align 2
26+
; CHECK: [[PIn1_23_shl:%[^ ]+]] = shl i32 [[PIn1_23]], 16
27+
; CHECK: [[PIn1_2:%[^ ]+]] = ashr i32 [[PIn1_23_shl]], 16
28+
; CHECK: [[PIn1_3:%[^ ]+]] = ashr i32 [[PIn1_23]], 16
29+
30+
; CHECK: [[Cast_PIn2_2:%[^ ]+]] = bitcast i16* %PIn2.2 to i32*
31+
; CHECK: [[PIn2_23:%[^ ]+]] = load i32, i32* [[Cast_PIn2_2]], align 2
32+
; CHECK: [[PIn2_23_shl:%[^ ]+]] = shl i32 [[PIn2_23]], 16
33+
; CHECK: [[PIn2_2:%[^ ]+]] = ashr i32 [[PIn2_23_shl]], 16
34+
; CHECK: [[PIn2_3:%[^ ]+]] = ashr i32 [[PIn2_23]], 16
35+
36+
; CHECK: mul nsw i32 [[PIn1_2]], [[PIn2_2]]
37+
; CHECK: mul nsw i32 [[PIn1_3]], [[PIn2_3]]
38+
39+
for.body:
40+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
41+
%count = phi i32 [ %N, %entry ], [ %count.next, %for.body ]
42+
%PIn1.0 = getelementptr inbounds i16, i16* %In1, i32 %iv
43+
%In1.0 = load i16, i16* %PIn1.0, align 2
44+
%SIn1.0 = sext i16 %In1.0 to i32
45+
%PIn2.0 = getelementptr inbounds i16, i16* %In2, i32 %iv
46+
%In2.0 = load i16, i16* %PIn2.0, align 2
47+
%SIn2.0 = sext i16 %In2.0 to i32
48+
%mul5.us.i.i = mul nsw i32 %SIn1.0, %SIn2.0
49+
%Out.0 = getelementptr inbounds i32, i32* %Out, i32 %iv
50+
store i32 %mul5.us.i.i, i32* %Out.0, align 4
51+
%iv.1 = or i32 %iv, 1
52+
%PIn1.1 = getelementptr inbounds i16, i16* %In1, i32 %iv.1
53+
%In1.1 = load i16, i16* %PIn1.1, align 2
54+
%SIn1.1 = sext i16 %In1.1 to i32
55+
%PIn2.1 = getelementptr inbounds i16, i16* %In2, i32 %iv.1
56+
%In2.1 = load i16, i16* %PIn2.1, align 2
57+
%SIn2.1 = sext i16 %In2.1 to i32
58+
%mul5.us.i.1.i = mul nsw i32 %SIn1.1, %SIn2.1
59+
%Out.1 = getelementptr inbounds i32, i32* %Out, i32 %iv.1
60+
store i32 %mul5.us.i.1.i, i32* %Out.1, align 4
61+
%iv.2 = or i32 %iv, 2
62+
%PIn1.2 = getelementptr inbounds i16, i16* %In1, i32 %iv.2
63+
%In1.2 = load i16, i16* %PIn1.2, align 2
64+
%SIn1.2 = sext i16 %In1.2 to i32
65+
%PIn2.2 = getelementptr inbounds i16, i16* %In2, i32 %iv.2
66+
%In2.2 = load i16, i16* %PIn2.2, align 2
67+
%SIn2.2 = sext i16 %In2.2 to i32
68+
%mul5.us.i.2.i = mul nsw i32 %SIn1.2, %SIn2.2
69+
%Out.2 = getelementptr inbounds i32, i32* %Out, i32 %iv.2
70+
store i32 %mul5.us.i.2.i, i32* %Out.2, align 4
71+
%iv.3 = or i32 %iv, 3
72+
%PIn1.3 = getelementptr inbounds i16, i16* %In1, i32 %iv.3
73+
%In1.3 = load i16, i16* %PIn1.3, align 2
74+
%SIn1.3 = sext i16 %In1.3 to i32
75+
%PIn2.3 = getelementptr inbounds i16, i16* %In2, i32 %iv.3
76+
%In2.3 = load i16, i16* %PIn2.3, align 2
77+
%SIn2.3 = sext i16 %In2.3 to i32
78+
%mul5.us.i.3.i = mul nsw i32 %SIn1.3, %SIn2.3
79+
%Out.3 = getelementptr inbounds i32, i32* %Out, i32 %iv.3
80+
store i32 %mul5.us.i.3.i, i32* %Out.3, align 4
81+
%iv.next = add i32 %iv, 4
82+
%count.next = add i32 %count, -4
83+
%niter375.ncmp.3.i = icmp eq i32 %count.next, 0
84+
br i1 %niter375.ncmp.3.i, label %exit, label %for.body
85+
86+
exit:
87+
ret void
88+
}
89+
90+
; CHECK-LABEL: topbottom_mul_load_const
91+
define void @topbottom_mul_load_const(i32 %N, i32* noalias nocapture readnone %Out, i16* nocapture readonly %In, i16* %C) {
92+
entry:
93+
%const = load i16, i16* %C
94+
%conv4.i.i = sext i16 %const to i32
95+
br label %for.body
96+
97+
; CHECK: for.body:
98+
; CHECK: [[Cast_PIn_0:%[^ ]+]] = bitcast i16* %PIn.0 to i32*
99+
; CHECK: [[PIn_01:%[^ ]+]] = load i32, i32* [[Cast_PIn_0]], align 2
100+
; CHECK: [[PIn_01_shl:%[^ ]+]] = shl i32 [[PIn_01]], 16
101+
; CHECK: [[PIn_0:%[^ ]+]] = ashr i32 [[PIn_01_shl]], 16
102+
; CHECK: [[PIn_1:%[^ ]+]] = ashr i32 [[PIn_01]], 16
103+
104+
; CHECK: mul nsw i32 [[PIn_0]], %conv4.i.i
105+
; CHECK: mul nsw i32 [[PIn_1]], %conv4.i.i
106+
107+
; CHECK: [[Cast_PIn_2:%[^ ]+]] = bitcast i16* %PIn.2 to i32*
108+
; CHECK: [[PIn_23:%[^ ]+]] = load i32, i32* [[Cast_PIn_2]], align 2
109+
; CHECK: [[PIn_23_shl:%[^ ]+]] = shl i32 [[PIn_23]], 16
110+
; CHECK: [[PIn_2:%[^ ]+]] = ashr i32 [[PIn_23_shl]], 16
111+
; CHECK: [[PIn_3:%[^ ]+]] = ashr i32 [[PIn_23]], 16
112+
113+
; CHECK: mul nsw i32 [[PIn_2]], %conv4.i.i
114+
; CHECK: mul nsw i32 [[PIn_3]], %conv4.i.i
115+
116+
for.body:
117+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
118+
%count = phi i32 [ %N, %entry ], [ %count.next, %for.body ]
119+
%PIn.0 = getelementptr inbounds i16, i16* %In, i32 %iv
120+
%In.0 = load i16, i16* %PIn.0, align 2
121+
%conv.us.i144.i = sext i16 %In.0 to i32
122+
%mul5.us.i.i = mul nsw i32 %conv.us.i144.i, %conv4.i.i
123+
%Out.0 = getelementptr inbounds i32, i32* %Out, i32 %iv
124+
store i32 %mul5.us.i.i, i32* %Out.0, align 4
125+
%iv.1 = or i32 %iv, 1
126+
%PIn.1 = getelementptr inbounds i16, i16* %In, i32 %iv.1
127+
%In.1 = load i16, i16* %PIn.1, align 2
128+
%conv.us.i144.1.i = sext i16 %In.1 to i32
129+
%mul5.us.i.1.i = mul nsw i32 %conv.us.i144.1.i, %conv4.i.i
130+
%Out.1 = getelementptr inbounds i32, i32* %Out, i32 %iv.1
131+
store i32 %mul5.us.i.1.i, i32* %Out.1, align 4
132+
%iv.2 = or i32 %iv, 2
133+
%PIn.2 = getelementptr inbounds i16, i16* %In, i32 %iv.2
134+
%In.3 = load i16, i16* %PIn.2, align 2
135+
%conv.us.i144.2.i = sext i16 %In.3 to i32
136+
%mul5.us.i.2.i = mul nsw i32 %conv.us.i144.2.i, %conv4.i.i
137+
%Out.2 = getelementptr inbounds i32, i32* %Out, i32 %iv.2
138+
store i32 %mul5.us.i.2.i, i32* %Out.2, align 4
139+
%iv.3 = or i32 %iv, 3
140+
%PIn.3 = getelementptr inbounds i16, i16* %In, i32 %iv.3
141+
%In.4 = load i16, i16* %PIn.3, align 2
142+
%conv.us.i144.3.i = sext i16 %In.4 to i32
143+
%mul5.us.i.3.i = mul nsw i32 %conv.us.i144.3.i, %conv4.i.i
144+
%Out.3 = getelementptr inbounds i32, i32* %Out, i32 %iv.3
145+
store i32 %mul5.us.i.3.i, i32* %Out.3, align 4
146+
%iv.next = add i32 %iv, 4
147+
%count.next = add i32 %count, -4
148+
%niter375.ncmp.3.i = icmp eq i32 %count.next, 0
149+
br i1 %niter375.ncmp.3.i, label %exit, label %for.body
150+
151+
exit:
152+
ret void
153+
}
154+
155+
; CHECK-LABEL: topbottom_mul_64
156+
define void @topbottom_mul_64(i32 %N, i64* noalias nocapture readnone %Out, i16* nocapture readonly %In1, i16* nocapture readonly %In2) {
157+
entry:
158+
br label %for.body
159+
160+
; CHECK: for.body:
161+
; CHECK: [[Cast_PIn1_0:%[^ ]+]] = bitcast i16* %PIn1.0 to i32*
162+
; CHECK: [[PIn1_01:%[^ ]+]] = load i32, i32* [[Cast_PIn1_0]], align 2
163+
; CHECK: [[PIn1_01_shl:%[^ ]+]] = shl i32 [[PIn1_01]], 16
164+
; CHECK: [[PIn1_0:%[^ ]+]] = ashr i32 [[PIn1_01_shl]], 16
165+
; CHECK: [[PIn1_1:%[^ ]+]] = ashr i32 [[PIn1_01]], 16
166+
167+
; CHECK: [[Cast_PIn2_0:%[^ ]+]] = bitcast i16* %PIn2.0 to i32*
168+
; CHECK: [[PIn2_01:%[^ ]+]] = load i32, i32* [[Cast_PIn2_0]], align 2
169+
; CHECK: [[PIn2_01_shl:%[^ ]+]] = shl i32 [[PIn2_01]], 16
170+
; CHECK: [[PIn2_0:%[^ ]+]] = ashr i32 [[PIn2_01_shl]], 16
171+
; CHECK: [[PIn2_1:%[^ ]+]] = ashr i32 [[PIn2_01]], 16
172+
173+
; CHECK: [[Mul0:%[^ ]+]] = mul nsw i32 [[PIn1_0]], [[PIn2_0]]
174+
; CHECK: [[SMul0:%[^ ]+]] = sext i32 [[Mul0]] to i64
175+
; CHECK: [[Mul1:%[^ ]+]] = mul nsw i32 [[PIn1_1]], [[PIn2_1]]
176+
; CHECK: [[SMul1:%[^ ]+]] = sext i32 [[Mul1]] to i64
177+
; CHECK: add i64 [[SMul0]], [[SMul1]]
178+
179+
; CHECK: [[Cast_PIn1_2:%[^ ]+]] = bitcast i16* %PIn1.2 to i32*
180+
; CHECK: [[PIn1_23:%[^ ]+]] = load i32, i32* [[Cast_PIn1_2]], align 2
181+
; CHECK: [[PIn1_23_shl:%[^ ]+]] = shl i32 [[PIn1_23]], 16
182+
; CHECK: [[PIn1_2:%[^ ]+]] = ashr i32 [[PIn1_23_shl]], 16
183+
; CHECK: [[PIn1_3:%[^ ]+]] = ashr i32 [[PIn1_23]], 16
184+
185+
; CHECK: [[Cast_PIn2_2:%[^ ]+]] = bitcast i16* %PIn2.2 to i32*
186+
; CHECK: [[PIn2_23:%[^ ]+]] = load i32, i32* [[Cast_PIn2_2]], align 2
187+
; CHECK: [[PIn2_23_shl:%[^ ]+]] = shl i32 [[PIn2_23]], 16
188+
; CHECK: [[PIn2_2:%[^ ]+]] = ashr i32 [[PIn2_23_shl]], 16
189+
; CHECK: [[PIn2_3:%[^ ]+]] = ashr i32 [[PIn2_23]], 16
190+
191+
; CHECK: [[Mul2:%[^ ]+]] = mul nsw i32 [[PIn1_2]], [[PIn2_2]]
192+
; CHECK: [[SMul2:%[^ ]+]] = sext i32 [[Mul2]] to i64
193+
; CHECK: [[Mul3:%[^ ]+]] = mul nsw i32 [[PIn1_3]], [[PIn2_3]]
194+
; CHECK: [[SMul3:%[^ ]+]] = sext i32 [[Mul3]] to i64
195+
; CHECK: add i64 [[SMul2]], [[SMul3]]
196+
197+
for.body:
198+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
199+
%iv.out = phi i32 [ 0, %entry] , [ %iv.out.next, %for.body ]
200+
%count = phi i32 [ %N, %entry ], [ %count.next, %for.body ]
201+
%PIn1.0 = getelementptr inbounds i16, i16* %In1, i32 %iv
202+
%In1.0 = load i16, i16* %PIn1.0, align 2
203+
%SIn1.0 = sext i16 %In1.0 to i32
204+
%PIn2.0 = getelementptr inbounds i16, i16* %In2, i32 %iv
205+
%In2.0 = load i16, i16* %PIn2.0, align 2
206+
%SIn2.0 = sext i16 %In2.0 to i32
207+
%mul5.us.i.i = mul nsw i32 %SIn1.0, %SIn2.0
208+
%sext.0 = sext i32 %mul5.us.i.i to i64
209+
%iv.1 = or i32 %iv, 1
210+
%PIn1.1 = getelementptr inbounds i16, i16* %In1, i32 %iv.1
211+
%In1.1 = load i16, i16* %PIn1.1, align 2
212+
%SIn1.1 = sext i16 %In1.1 to i32
213+
%PIn2.1 = getelementptr inbounds i16, i16* %In2, i32 %iv.1
214+
%In2.1 = load i16, i16* %PIn2.1, align 2
215+
%SIn2.1 = sext i16 %In2.1 to i32
216+
%mul5.us.i.1.i = mul nsw i32 %SIn1.1, %SIn2.1
217+
%sext.1 = sext i32 %mul5.us.i.1.i to i64
218+
%mac.0 = add i64 %sext.0, %sext.1
219+
%Out.0 = getelementptr inbounds i64, i64* %Out, i32 %iv.out
220+
store i64 %mac.0, i64* %Out.0, align 4
221+
%iv.2 = or i32 %iv, 2
222+
%PIn1.2 = getelementptr inbounds i16, i16* %In1, i32 %iv.2
223+
%In1.2 = load i16, i16* %PIn1.2, align 2
224+
%SIn1.2 = sext i16 %In1.2 to i32
225+
%PIn2.2 = getelementptr inbounds i16, i16* %In2, i32 %iv.2
226+
%In2.2 = load i16, i16* %PIn2.2, align 2
227+
%SIn2.2 = sext i16 %In2.2 to i32
228+
%mul5.us.i.2.i = mul nsw i32 %SIn1.2, %SIn2.2
229+
%sext.2 = sext i32 %mul5.us.i.2.i to i64
230+
%iv.3 = or i32 %iv, 3
231+
%PIn1.3 = getelementptr inbounds i16, i16* %In1, i32 %iv.3
232+
%In1.3 = load i16, i16* %PIn1.3, align 2
233+
%SIn1.3 = sext i16 %In1.3 to i32
234+
%PIn2.3 = getelementptr inbounds i16, i16* %In2, i32 %iv.3
235+
%In2.3 = load i16, i16* %PIn2.3, align 2
236+
%SIn2.3 = sext i16 %In2.3 to i32
237+
%mul5.us.i.3.i = mul nsw i32 %SIn1.3, %SIn2.3
238+
%sext.3 = sext i32 %mul5.us.i.3.i to i64
239+
%mac.1 = add i64 %sext.2, %sext.3
240+
%iv.out.1 = or i32 %iv.out, 1
241+
%Out.1 = getelementptr inbounds i64, i64* %Out, i32 %iv.out.1
242+
store i64 %mac.1, i64* %Out.1, align 4
243+
%iv.next = add i32 %iv, 4
244+
%iv.out.next = add i32 %iv.out, 2
245+
%count.next = add i32 %count, -4
246+
%niter375.ncmp.3.i = icmp eq i32 %count.next, 0
247+
br i1 %niter375.ncmp.3.i, label %exit, label %for.body
248+
249+
exit:
250+
ret void
251+
}

0 commit comments

Comments
 (0)
Please sign in to comment.