Skip to content

Commit acb628b

Browse files
committedFeb 22, 2019
[ARM] Add some missing thumb1 opcodes to enable peephole optimisation of CMPs
This adds a number of missing Thumb1 opcodes so that the peephole optimiser can remove redundant CMP instructions. Reapplying this after the first attempt broke non-thumb1 code as the t2ADDri instruction can be used with frame indices. In thumb1 we use tADDframe. Differential Revision: https://reviews.llvm.org/D57833 llvm-svn: 354667
1 parent 0cc32dd commit acb628b

File tree

4 files changed

+324
-13
lines changed

4 files changed

+324
-13
lines changed
 

‎llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp

+54-11
Original file line numberDiff line numberDiff line change
@@ -2542,6 +2542,7 @@ bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
25422542
return true;
25432543
case ARM::CMPrr:
25442544
case ARM::t2CMPrr:
2545+
case ARM::tCMPr:
25452546
SrcReg = MI.getOperand(0).getReg();
25462547
SrcReg2 = MI.getOperand(1).getReg();
25472548
CmpMask = ~0;
@@ -2618,28 +2619,62 @@ inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) {
26182619
/// This function can be extended later on.
26192620
inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
26202621
unsigned SrcReg, unsigned SrcReg2,
2621-
int ImmValue, const MachineInstr *OI) {
2622+
int ImmValue, const MachineInstr *OI,
2623+
bool &IsThumb1) {
26222624
if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
26232625
(OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
26242626
((OI->getOperand(1).getReg() == SrcReg &&
26252627
OI->getOperand(2).getReg() == SrcReg2) ||
26262628
(OI->getOperand(1).getReg() == SrcReg2 &&
2627-
OI->getOperand(2).getReg() == SrcReg)))
2629+
OI->getOperand(2).getReg() == SrcReg))) {
2630+
IsThumb1 = false;
2631+
return true;
2632+
}
2633+
2634+
if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2635+
((OI->getOperand(2).getReg() == SrcReg &&
2636+
OI->getOperand(3).getReg() == SrcReg2) ||
2637+
(OI->getOperand(2).getReg() == SrcReg2 &&
2638+
OI->getOperand(3).getReg() == SrcReg))) {
2639+
IsThumb1 = true;
26282640
return true;
2641+
}
26292642

26302643
if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
26312644
(OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
26322645
OI->getOperand(1).getReg() == SrcReg &&
2633-
OI->getOperand(2).getImm() == ImmValue)
2646+
OI->getOperand(2).getImm() == ImmValue) {
2647+
IsThumb1 = false;
26342648
return true;
2649+
}
2650+
2651+
if (CmpI->getOpcode() == ARM::tCMPi8 &&
2652+
(OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2653+
OI->getOperand(2).getReg() == SrcReg &&
2654+
OI->getOperand(3).getImm() == ImmValue) {
2655+
IsThumb1 = true;
2656+
return true;
2657+
}
26352658

26362659
if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
26372660
(OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
26382661
OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
26392662
OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
26402663
OI->getOperand(0).getReg() == SrcReg &&
2641-
OI->getOperand(1).getReg() == SrcReg2)
2664+
OI->getOperand(1).getReg() == SrcReg2) {
2665+
IsThumb1 = false;
2666+
return true;
2667+
}
2668+
2669+
if (CmpI->getOpcode() == ARM::tCMPr &&
2670+
(OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2671+
OI->getOpcode() == ARM::tADDrr) &&
2672+
OI->getOperand(0).getReg() == SrcReg &&
2673+
OI->getOperand(2).getReg() == SrcReg2) {
2674+
IsThumb1 = true;
26422675
return true;
2676+
}
2677+
26432678
return false;
26442679
}
26452680

@@ -2756,7 +2791,8 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
27562791
// For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
27572792
// Thus we cannot return here.
27582793
if (CmpInstr.getOpcode() == ARM::CMPri ||
2759-
CmpInstr.getOpcode() == ARM::t2CMPri)
2794+
CmpInstr.getOpcode() == ARM::t2CMPri ||
2795+
CmpInstr.getOpcode() == ARM::tCMPi8)
27602796
MI = nullptr;
27612797
else
27622798
return false;
@@ -2800,11 +2836,13 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
28002836
// Check that CPSR isn't set between the comparison instruction and the one we
28012837
// want to change. At the same time, search for SubAdd.
28022838
const TargetRegisterInfo *TRI = &getRegisterInfo();
2839+
bool SubAddIsThumb1 = false;
28032840
do {
28042841
const MachineInstr &Instr = *--I;
28052842

28062843
// Check whether CmpInstr can be made redundant by the current instruction.
2807-
if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr)) {
2844+
if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
2845+
SubAddIsThumb1)) {
28082846
SubAdd = &*I;
28092847
break;
28102848
}
@@ -2828,7 +2866,7 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
28282866
// If we found a SubAdd, use it as it will be closer to the CMP
28292867
if (SubAdd) {
28302868
MI = SubAdd;
2831-
IsThumb1 = false;
2869+
IsThumb1 = SubAddIsThumb1;
28322870
}
28332871

28342872
// We can't use a predicated instruction - it doesn't always write the flags.
@@ -2897,9 +2935,13 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
28972935
// operands will be modified.
28982936
unsigned Opc = SubAdd->getOpcode();
28992937
bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
2900-
Opc == ARM::SUBri || Opc == ARM::t2SUBri;
2901-
if (!IsSub || (SrcReg2 != 0 && SubAdd->getOperand(1).getReg() == SrcReg2 &&
2902-
SubAdd->getOperand(2).getReg() == SrcReg)) {
2938+
Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
2939+
Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
2940+
Opc == ARM::tSUBi8;
2941+
unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
2942+
if (!IsSub ||
2943+
(SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
2944+
SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
29032945
// VSel doesn't support condition code update.
29042946
if (IsInstrVSel)
29052947
return false;
@@ -2977,9 +3019,10 @@ bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
29773019
++Next;
29783020
unsigned SrcReg, SrcReg2;
29793021
int CmpMask, CmpValue;
3022+
bool IsThumb1;
29803023
if (Next != MI.getParent()->end() &&
29813024
analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
2982-
isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI))
3025+
isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
29833026
return false;
29843027
return true;
29853028
}

‎llvm/test/CodeGen/ARM/intrinsics-overflow.ll

+1-2
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,7 @@ define i32 @sadd_overflow(i32 %a, i32 %b) #0 {
3838
; ARM: movvc r[[R0]], #0
3939
; ARM: mov pc, lr
4040

41-
; THUMBV6: adds r1, r0, r1
42-
; THUMBV6: cmp r1, r0
41+
; THUMBV6: adds r0, r0, r1
4342
; THUMBV6: bvc .LBB1_2
4443

4544
; THUMBV7: adds r[[R2:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
+226
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
# RUN: llc -mtriple thumbv8m.base-none-eabi -run-pass=peephole-opt -verify-machineinstrs -o - %s | FileCheck %s
2+
--- |
3+
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
4+
target triple = "thumbv8m.base-none-none-eabi"
5+
6+
define i32 @test_subrr(i32 %a, i32 %b) { ret i32 %a }
7+
define i32 @test_subrr_c(i32 %a, i32 %b) { ret i32 %a }
8+
define i32 @test_subri3(i32 %a) { ret i32 %a }
9+
define i32 @test_subri8(i32 %a) { ret i32 %a }
10+
define i32 @test_addrr(i32 %a) { ret i32 %a }
11+
define i32 @test_addri3(i32 %a) { ret i32 %a }
12+
define i32 @test_addri8(i32 %a) { ret i32 %a }
13+
14+
...
15+
---
16+
name: test_subrr
17+
liveins:
18+
- { reg: '$r0', virtual-reg: '%1' }
19+
- { reg: '$r1', virtual-reg: '%2' }
20+
body: |
21+
bb.0:
22+
successors: %bb.2(0x40000000), %bb.1(0x40000000)
23+
liveins: $r0, $r1
24+
25+
%2:tgpr = COPY $r1
26+
%1:tgpr = COPY $r0
27+
%0:tgpr, $cpsr = tSUBrr %2, %1, 14, $noreg
28+
tCMPr %1, %2, 14, $noreg, implicit-def $cpsr
29+
tBcc %bb.2, 3, $cpsr
30+
tB %bb.1, 14, $noreg
31+
32+
bb.1:
33+
$r0 = COPY %0
34+
tBX_RET 14, $noreg, implicit $r0
35+
36+
bb.2:
37+
%3:tgpr, dead $cpsr = tMOVi8 0, 14, $noreg
38+
$r0 = COPY %3
39+
tBX_RET 14, $noreg, implicit $r0
40+
41+
# CHECK-LABEL: name: test_subrr
42+
# CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r1
43+
# CHECK-NEXT: [[COPY0:%[0-9]+]]:tgpr = COPY $r0
44+
# CHECK-NEXT: [[ADD:%[0-9]+]]:tgpr, $cpsr = tSUBrr [[COPY1]], [[COPY0]], 14, $noreg
45+
# CHECK-NEXT: tBcc %bb.2, 8, $cpsr
46+
...
47+
---
48+
name: test_subrr_c
49+
liveins:
50+
- { reg: '$r0', virtual-reg: '%1' }
51+
- { reg: '$r1', virtual-reg: '%2' }
52+
body: |
53+
bb.0:
54+
successors: %bb.2(0x40000000), %bb.1(0x40000000)
55+
liveins: $r0, $r1
56+
57+
%2:tgpr = COPY $r1
58+
%1:tgpr = COPY $r0
59+
%0:tgpr, $cpsr = tSUBrr %1, %2, 14, $noreg
60+
tCMPr %1, %2, 14, $noreg, implicit-def $cpsr
61+
tBcc %bb.2, 3, $cpsr
62+
tB %bb.1, 14, $noreg
63+
64+
bb.1:
65+
$r0 = COPY %0
66+
tBX_RET 14, $noreg, implicit $r0
67+
68+
bb.2:
69+
%3:tgpr, dead $cpsr = tMOVi8 0, 14, $noreg
70+
$r0 = COPY %3
71+
tBX_RET 14, $noreg, implicit $r0
72+
73+
# CHECK-LABEL: name: test_subrr_c
74+
# CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r1
75+
# CHECK-NEXT: [[COPY0:%[0-9]+]]:tgpr = COPY $r0
76+
# CHECK-NEXT: [[ADD:%[0-9]+]]:tgpr, $cpsr = tSUBrr [[COPY0]], [[COPY1]], 14, $noreg
77+
# CHECK-NEXT: tBcc %bb.2, 3, $cpsr
78+
...
79+
---
80+
name: test_subri3
81+
liveins:
82+
- { reg: '$r0', virtual-reg: '%1' }
83+
body: |
84+
bb.0:
85+
successors: %bb.2(0x40000000), %bb.1(0x40000000)
86+
liveins: $r0
87+
88+
%1:tgpr = COPY $r0
89+
%0:tgpr, $cpsr = tSUBi3 %1, 1, 14, $noreg
90+
tCMPi8 %1, 1, 14, $noreg, implicit-def $cpsr
91+
tBcc %bb.2, 3, $cpsr
92+
tB %bb.1, 14, $noreg
93+
94+
bb.1:
95+
$r0 = COPY %0
96+
tBX_RET 14, $noreg, implicit $r0
97+
98+
bb.2:
99+
%2:tgpr, dead $cpsr = tMOVi8 0, 14, $noreg
100+
$r0 = COPY %2
101+
tBX_RET 14, $noreg, implicit $r0
102+
103+
# CHECK-LABEL: name: test_subri3
104+
# CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
105+
# CHECK-NEXT: [[ADD:%[0-9]+]]:tgpr, $cpsr = tSUBi3 [[COPY]], 1, 14, $noreg
106+
# CHECK-NEXT: tBcc %bb.2, 3, $cpsr
107+
...
108+
---
109+
name: test_subri8
110+
liveins:
111+
- { reg: '$r0', virtual-reg: '%1' }
112+
body: |
113+
bb.0:
114+
successors: %bb.2(0x40000000), %bb.1(0x40000000)
115+
liveins: $r0
116+
117+
%1:tgpr = COPY $r0
118+
%0:tgpr, $cpsr = tSUBi8 %1, 1, 14, $noreg
119+
tCMPi8 %1, 1, 14, $noreg, implicit-def $cpsr
120+
tBcc %bb.2, 3, $cpsr
121+
tB %bb.1, 14, $noreg
122+
123+
bb.1:
124+
$r0 = COPY %0
125+
tBX_RET 14, $noreg, implicit $r0
126+
127+
bb.2:
128+
%2:tgpr, dead $cpsr = tMOVi8 0, 14, $noreg
129+
$r0 = COPY %2
130+
tBX_RET 14, $noreg, implicit $r0
131+
132+
# CHECK-LABEL: name: test_subri8
133+
# CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
134+
# CHECK-NEXT: [[ADD:%[0-9]+]]:tgpr, $cpsr = tSUBi8 [[COPY]], 1, 14, $noreg
135+
# CHECK-NEXT: tBcc %bb.2, 3, $cpsr
136+
...
137+
---
138+
name: test_addrr
139+
liveins:
140+
- { reg: '$r0', virtual-reg: '%1' }
141+
- { reg: '$r1', virtual-reg: '%2' }
142+
body: |
143+
bb.0:
144+
successors: %bb.2(0x40000000), %bb.1(0x40000000)
145+
liveins: $r0, $r1
146+
147+
%2:tgpr = COPY $r1
148+
%1:tgpr = COPY $r0
149+
%0:tgpr, $cpsr = tADDrr %2, %1, 14, $noreg
150+
tCMPr %0, %2, 14, $noreg, implicit-def $cpsr
151+
tBcc %bb.2, 3, $cpsr
152+
tB %bb.1, 14, $noreg
153+
154+
bb.1:
155+
$r0 = COPY %0
156+
tBX_RET 14, $noreg, implicit $r0
157+
158+
bb.2:
159+
%3:tgpr, dead $cpsr = tMOVi8 0, 14, $noreg
160+
$r0 = COPY %3
161+
tBX_RET 14, $noreg, implicit $r0
162+
163+
# CHECK-LABEL: name: test_addrr
164+
# CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r1
165+
# CHECK-NEXT: [[COPY0:%[0-9]+]]:tgpr = COPY $r0
166+
# CHECK-NEXT: [[ADD:%[0-9]+]]:tgpr, $cpsr = tADDrr [[COPY1]], [[COPY0]], 14, $noreg
167+
# CHECK-NEXT: tBcc %bb.2, 2, $cpsr
168+
...
169+
---
170+
name: test_addri3
171+
liveins:
172+
- { reg: '$r0', virtual-reg: '%1' }
173+
body: |
174+
bb.0:
175+
successors: %bb.2(0x40000000), %bb.1(0x40000000)
176+
liveins: $r0
177+
178+
%0:tgpr = COPY $r0
179+
%1:tgpr, $cpsr = tADDi3 %0, 1, 14, $noreg
180+
tCMPr %1, %0, 14, $noreg, implicit-def $cpsr
181+
tBcc %bb.2, 3, $cpsr
182+
tB %bb.1, 14, $noreg
183+
184+
bb.1:
185+
$r0 = COPY %0
186+
tBX_RET 14, $noreg, implicit $r0
187+
188+
bb.2:
189+
%2:tgpr, dead $cpsr = tMOVi8 0, 14, $noreg
190+
$r0 = COPY %2
191+
tBX_RET 14, $noreg, implicit $r0
192+
193+
# CHECK-LABEL: name: test_addri3
194+
# CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
195+
# CHECK-NEXT: [[ADD:%[0-9]+]]:tgpr, $cpsr = tADDi3 [[COPY]], 1, 14, $noreg
196+
# CHECK-NEXT: tBcc %bb.2, 2, $cpsr
197+
...
198+
---
199+
name: test_addri8
200+
liveins:
201+
- { reg: '$r0', virtual-reg: '%1' }
202+
body: |
203+
bb.0:
204+
successors: %bb.2(0x40000000), %bb.1(0x40000000)
205+
liveins: $r0
206+
207+
%0:tgpr = COPY $r0
208+
%1:tgpr, $cpsr = tADDi8 %0, 10, 14, $noreg
209+
tCMPr %1, %0, 14, $noreg, implicit-def $cpsr
210+
tBcc %bb.2, 3, $cpsr
211+
tB %bb.1, 14, $noreg
212+
213+
bb.1:
214+
$r0 = COPY %0
215+
tBX_RET 14, $noreg, implicit $r0
216+
217+
bb.2:
218+
%2:tgpr, dead $cpsr = tMOVi8 0, 14, $noreg
219+
$r0 = COPY %2
220+
tBX_RET 14, $noreg, implicit $r0
221+
222+
# CHECK-LABEL: name: test_addri8
223+
# CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
224+
# CHECK-NEXT: [[ADD:%[0-9]+]]:tgpr, $cpsr = tADDi8 [[COPY]], 10, 14, $noreg
225+
# CHECK-NEXT: tBcc %bb.2, 2, $cpsr
226+
...
+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# RUN: llc -run-pass=peephole-opt -verify-machineinstrs -o - %s | FileCheck %s
2+
--- |
3+
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
4+
target triple = "thumbv7m-none-none-eabi"
5+
6+
define i32 @test_addir_frameindex(i32 %a) {
7+
%f = alloca i32
8+
ret i32 %a
9+
}
10+
...
11+
---
12+
name: test_addir_frameindex
13+
liveins:
14+
- { reg: '$r0', virtual-reg: '%0' }
15+
stack:
16+
- { id: 0, name: f, type: default, offset: 0, size: 1, alignment: 4,
17+
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
18+
local-offset: -4, debug-info-variable: '', debug-info-expression: '',
19+
debug-info-location: '' }
20+
body: |
21+
bb.0:
22+
successors: %bb.2(0x40000000), %bb.1(0x40000000)
23+
liveins: $r0
24+
25+
%0:rgpr = COPY $r0
26+
%1:gprnopc = t2ADDri %stack.0.f, 0, 14, $noreg, $noreg
27+
t2CMPrr %1, %0, 14, $noreg, implicit-def $cpsr
28+
t2Bcc %bb.2, 3, $cpsr
29+
t2B %bb.1, 14, $noreg
30+
31+
bb.1:
32+
$r0 = COPY %1
33+
tBX_RET 14, $noreg
34+
35+
bb.2:
36+
$r0 = COPY %0
37+
tBX_RET 14, $noreg
38+
39+
# CHECK-LABEL: name: test_addir_frameindex
40+
# CHECK: %1:gprnopc = t2ADDri %stack.0.f, 0, 14, $noreg, $noreg
41+
# CHECK-NEXT: t2CMPrr %1, %0, 14, $noreg, implicit-def $cpsr
42+
# CHECK-NEXT: t2Bcc %bb.2, 3, $cpsr
43+
...

0 commit comments

Comments
 (0)
Please sign in to comment.