Skip to content

Commit 20982f0

Browse files
committedSep 26, 2018
[PowerPC] optimize conditional branch on CRSET/CRUNSET
This patch adds a check to optimize conditional branch (BC and BCn) based on a constant set by CRSET or CRUNSET. Other optimizers, such as block placement, may generate such code and hence I do this at the very end of the optimization in pre-emit peephole pass. A conditional branch based on a constant is eliminated or converted into unconditional branch. Also CRSET/CRUNSET is eliminated if the condition code register is not used by instruction other than the branch to be optimized. Differential Revision: https://reviews.llvm.org/D52345 llvm-svn: 343100
1 parent 20b5abe commit 20982f0

File tree

3 files changed

+335
-0
lines changed

3 files changed

+335
-0
lines changed
 

‎llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp

+71
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "llvm/ADT/DenseMap.h"
1919
#include "llvm/ADT/Statistic.h"
2020
#include "llvm/CodeGen/LivePhysRegs.h"
21+
#include "llvm/CodeGen/MachineBasicBlock.h"
2122
#include "llvm/CodeGen/MachineFunctionPass.h"
2223
#include "llvm/CodeGen/MachineInstrBuilder.h"
2324
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -60,6 +61,7 @@ namespace {
6061
return false;
6162
bool Changed = false;
6263
const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
64+
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
6365
SmallVector<MachineInstr *, 4> InstrsToErase;
6466
for (MachineBasicBlock &MBB : MF) {
6567
for (MachineInstr &MI : MBB) {
@@ -74,6 +76,75 @@ namespace {
7476
}
7577
}
7678
}
79+
80+
// Eliminate conditional branch based on a constant CR bit by
81+
// CRSET or CRUNSET. We eliminate the conditional branch or
82+
// convert it into an unconditional branch. Also, if the CR bit
83+
// is not used by other instructions, we eliminate CRSET as well.
84+
auto I = MBB.getFirstInstrTerminator();
85+
if (I == MBB.instr_end())
86+
continue;
87+
MachineInstr *Br = &*I;
88+
if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
89+
continue;
90+
MachineInstr *CRSetMI = nullptr;
91+
unsigned CRBit = Br->getOperand(0).getReg();
92+
unsigned CRReg = getCRFromCRBit(CRBit);
93+
bool SeenUse = false;
94+
MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
95+
for (It++; It != Er; It++) {
96+
if (It->modifiesRegister(CRBit, TRI)) {
97+
if ((It->getOpcode() == PPC::CRUNSET ||
98+
It->getOpcode() == PPC::CRSET) &&
99+
It->getOperand(0).getReg() == CRBit)
100+
CRSetMI = &*It;
101+
break;
102+
}
103+
if (It->readsRegister(CRBit, TRI))
104+
SeenUse = true;
105+
}
106+
if (!CRSetMI) continue;
107+
108+
unsigned CRSetOp = CRSetMI->getOpcode();
109+
if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) ||
110+
(Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) {
111+
// Remove this branch since it cannot be taken.
112+
InstrsToErase.push_back(Br);
113+
MBB.removeSuccessor(Br->getOperand(1).getMBB());
114+
}
115+
else {
116+
// This conditional branch is always taken. So, remove all branches
117+
// and insert an unconditional branch to the destination of this.
118+
MachineBasicBlock::iterator It = Br, Er = MBB.end();
119+
for (; It != Er && !SeenUse; It++) {
120+
if (It->isDebugInstr()) continue;
121+
assert(It->isTerminator() && "Non-terminator after a terminator");
122+
InstrsToErase.push_back(&*It);
123+
}
124+
if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) {
125+
ArrayRef<MachineOperand> NoCond;
126+
TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr,
127+
NoCond, Br->getDebugLoc());
128+
}
129+
for (auto &Succ : MBB.successors())
130+
if (Succ != Br->getOperand(1).getMBB()) {
131+
MBB.removeSuccessor(Succ);
132+
break;
133+
}
134+
}
135+
136+
// If the CRBit is not used by another instruction, we can eliminate
137+
// CRSET/CRUNSET instruction.
138+
if (!SeenUse) {
139+
// We need to check use of the CRBit in successors.
140+
for (auto &SuccMBB : MBB.successors())
141+
if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) {
142+
SeenUse = true;
143+
break;
144+
}
145+
if (!SeenUse)
146+
InstrsToErase.push_back(CRSetMI);
147+
}
77148
}
78149
for (MachineInstr *MI : InstrsToErase) {
79150
LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# RUN: llc -verify-machineinstrs -start-before=ppc-pre-emit-peephole %s -o - | FileCheck %s
2+
--- |
3+
target datalayout = "e-m:e-i64:64-n32:64"
4+
target triple = "powerpc64le-unknown-linux-gnu"
5+
6+
declare signext i32 @callee(i32 signext) local_unnamed_addr #1
7+
8+
define signext i32 @func(i32 signext %v) local_unnamed_addr #0 {
9+
entry:
10+
%call.i = tail call signext i32 @callee(i32 signext %v)
11+
%tobool.i = icmp eq i32 %call.i, 0
12+
br i1 %tobool.i, label %if.else.i, label %if.then.i
13+
14+
if.then.i: ; preds = %entry
15+
%call2.i = tail call signext i32 @callee(i32 signext %call.i)
16+
br label %_Z6calleei.exit
17+
18+
if.else.i: ; preds = %entry
19+
%phitmp = icmp sgt i32 %v, -1
20+
br label %_Z6calleei.exit
21+
22+
_Z6calleei.exit: ; preds = %if.else.i, %if.then.i
23+
%call2.i.sink = phi i32 [ %call2.i, %if.then.i ], [ %v, %if.else.i ]
24+
%.sink = phi i1 [ false, %if.then.i ], [ %phitmp, %if.else.i ]
25+
br i1 %.sink, label %if.end, label %if.then
26+
27+
if.then: ; preds = %_Z6calleei.exit
28+
%call1 = tail call signext i32 @callee(i32 signext 0)
29+
br label %if.end
30+
31+
if.end: ; preds = %if.then, %_Z6calleei.exit
32+
ret i32 %call2.i.sink
33+
}
34+
35+
attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
36+
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
37+
38+
...
39+
---
40+
name: func
41+
alignment: 4
42+
exposesReturnsTwice: false
43+
legalized: false
44+
regBankSelected: false
45+
selected: false
46+
failedISel: false
47+
tracksRegLiveness: true
48+
registers:
49+
liveins:
50+
- { reg: '$x3', virtual-reg: '' }
51+
frameInfo:
52+
isFrameAddressTaken: false
53+
isReturnAddressTaken: false
54+
hasStackMap: false
55+
hasPatchPoint: false
56+
stackSize: 48
57+
offsetAdjustment: 0
58+
maxAlignment: 0
59+
adjustsStack: true
60+
hasCalls: true
61+
stackProtector: ''
62+
maxCallFrameSize: 32
63+
hasOpaqueSPAdjustment: false
64+
hasVAStart: false
65+
hasMustTailInVarArgFunc: false
66+
localFrameSize: 0
67+
savePoint: ''
68+
restorePoint: ''
69+
fixedStack:
70+
- { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: 0,
71+
callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '',
72+
debug-info-expression: '', debug-info-location: '' }
73+
stack:
74+
constants:
75+
76+
body: |
77+
bb.0.entry:
78+
successors: %bb.2(0x30000000), %bb.1(0x50000000)
79+
liveins: $x3, $x30
80+
81+
; bc and crxor (CRUNSET) should be removed.
82+
; CHECK-LABEL: func
83+
; CHECK: # %bb.1
84+
; CHECK-NOT: crxor
85+
; CHECK-NOT: bc
86+
; CHECK: .LBB0_2
87+
88+
$x0 = MFLR8 implicit $lr8
89+
STD killed $x0, 16, $x1
90+
$x1 = STDU $x1, -48, $x1
91+
STD killed $x30, 32, $x1 :: (store 8 into %fixed-stack.0, align 16)
92+
$x30 = OR8 $x3, $x3
93+
BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3
94+
renamable $cr0 = CMPLWI renamable $r3, 0
95+
BCC 76, killed renamable $cr0, %bb.2
96+
97+
bb.1.if.then.i:
98+
successors: %bb.5(0x40000000), %bb.4(0x40000000)
99+
liveins: $x3
100+
101+
renamable $x3 = EXTSW_32_64 killed renamable $r3, implicit $x3
102+
BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3
103+
renamable $cr0gt = CRUNSET implicit-def $cr0
104+
$x30 = OR8 killed $x3, $x3
105+
BC killed renamable $cr0gt, %bb.5
106+
107+
bb.4.if.then:
108+
successors: %bb.5(0x80000000)
109+
liveins: $x30
110+
111+
$x3 = LI8 0
112+
BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def dead $x3
113+
114+
bb.5.if.end:
115+
liveins: $x30
116+
117+
renamable $x3 = EXTSW_32_64 killed renamable $r30, implicit $x30
118+
$x30 = LD 32, $x1 :: (load 8 from %fixed-stack.0, align 16)
119+
$x1 = ADDI8 $x1, 48
120+
$x0 = LD 16, $x1
121+
MTLR8 killed $x0, implicit-def $lr8
122+
BLR8 implicit $lr8, implicit $rm, implicit killed $x3
123+
124+
bb.2.if.else.i:
125+
successors: %bb.5(0x40000000), %bb.4(0x40000000)
126+
liveins: $x30
127+
128+
renamable $cr0 = CMPWI renamable $r30, -1
129+
BCn killed renamable $cr0gt, %bb.4
130+
B %bb.5
131+
132+
...
+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# RUN: llc -verify-machineinstrs -start-before=ppc-pre-emit-peephole %s -o - | FileCheck %s
2+
--- |
3+
target datalayout = "e-m:e-i64:64-n32:64"
4+
target triple = "powerpc64le-unknown-linux-gnu"
5+
6+
declare signext i32 @callee(i32 signext) local_unnamed_addr #1
7+
8+
define signext i32 @func(i32 signext %v) local_unnamed_addr #0 {
9+
entry:
10+
%call.i = tail call signext i32 @callee(i32 signext %v)
11+
%tobool.i = icmp eq i32 %call.i, 0
12+
br i1 %tobool.i, label %if.else.i, label %if.then.i
13+
14+
if.then.i: ; preds = %entry
15+
%call2.i = tail call signext i32 @callee(i32 signext %call.i)
16+
br label %_Z6calleei.exit
17+
18+
if.else.i: ; preds = %entry
19+
%phitmp = icmp sgt i32 %v, -1
20+
br label %_Z6calleei.exit
21+
22+
_Z6calleei.exit: ; preds = %if.else.i, %if.then.i
23+
%call2.i.sink = phi i32 [ %call2.i, %if.then.i ], [ %v, %if.else.i ]
24+
%.sink = phi i1 [ false, %if.then.i ], [ %phitmp, %if.else.i ]
25+
br i1 %.sink, label %if.end, label %if.then
26+
27+
if.then: ; preds = %_Z6calleei.exit
28+
%call1 = tail call signext i32 @callee(i32 signext 0)
29+
br label %if.end
30+
31+
if.end: ; preds = %if.then, %_Z6calleei.exit
32+
ret i32 %call2.i.sink
33+
}
34+
35+
attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
36+
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
37+
38+
...
39+
---
40+
name: func
41+
alignment: 4
42+
exposesReturnsTwice: false
43+
legalized: false
44+
regBankSelected: false
45+
selected: false
46+
failedISel: false
47+
tracksRegLiveness: true
48+
registers:
49+
liveins:
50+
- { reg: '$x3', virtual-reg: '' }
51+
frameInfo:
52+
isFrameAddressTaken: false
53+
isReturnAddressTaken: false
54+
hasStackMap: false
55+
hasPatchPoint: false
56+
stackSize: 48
57+
offsetAdjustment: 0
58+
maxAlignment: 0
59+
adjustsStack: true
60+
hasCalls: true
61+
stackProtector: ''
62+
maxCallFrameSize: 32
63+
hasOpaqueSPAdjustment: false
64+
hasVAStart: false
65+
hasMustTailInVarArgFunc: false
66+
localFrameSize: 0
67+
savePoint: ''
68+
restorePoint: ''
69+
fixedStack:
70+
- { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: 0,
71+
callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '',
72+
debug-info-expression: '', debug-info-location: '' }
73+
stack:
74+
constants:
75+
76+
body: |
77+
bb.0.entry:
78+
successors: %bb.2(0x30000000), %bb.1(0x50000000)
79+
liveins: $x3, $x30
80+
81+
; bc should be converted into b, but creqv (CRSET) should not be removed since it is used in a predecessor.
82+
; CHECK-LABEL: func
83+
; CHECK: # %bb.1
84+
; CHECK: creqv
85+
; CHECK-NOT: bc
86+
; CHECK: .LBB0_2
87+
88+
$x0 = MFLR8 implicit $lr8
89+
STD killed $x0, 16, $x1
90+
$x1 = STDU $x1, -48, $x1
91+
STD killed $x30, 32, $x1 :: (store 8 into %fixed-stack.0, align 16)
92+
$x30 = OR8 $x3, $x3
93+
BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3
94+
renamable $cr0 = CMPLWI renamable $r3, 0
95+
BCC 76, killed renamable $cr0, %bb.2
96+
97+
bb.1.if.then.i:
98+
successors: %bb.5(0x40000000), %bb.4(0x40000000)
99+
liveins: $x3
100+
101+
renamable $x3 = EXTSW_32_64 killed renamable $r3, implicit $x3
102+
BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3
103+
renamable $cr0gt = CRSET implicit-def $cr0
104+
$x30 = OR8 killed $x3, $x3
105+
BC killed renamable $cr0gt, %bb.5
106+
107+
bb.4.if.then:
108+
successors: %bb.5(0x80000000)
109+
liveins: $x30
110+
111+
$x3 = LI8 0
112+
BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def dead $x3
113+
114+
bb.5.if.end:
115+
liveins: $x30, $cr0gt
116+
117+
renamable $x3 = EXTSW_32_64 killed renamable $r30, implicit $x30
118+
$x30 = LD 32, $x1 :: (load 8 from %fixed-stack.0, align 16)
119+
$x1 = ADDI8 $x1, 48
120+
$x0 = LD 16, $x1
121+
MTLR8 killed $x0, implicit-def $lr8
122+
BLR8 implicit $lr8, implicit $rm, implicit killed $x3
123+
124+
bb.2.if.else.i:
125+
successors: %bb.5(0x40000000), %bb.4(0x40000000)
126+
liveins: $x30
127+
128+
renamable $cr0 = CMPWI renamable $r30, -1
129+
BCn killed renamable $cr0gt, %bb.4
130+
B %bb.5
131+
132+
...

0 commit comments

Comments
 (0)