Skip to content

Commit bb82127

Browse files
author
Eli Friedman
committedMar 2, 2017
[ARM] Fix insert point for store rescheduling.
In ARMPreAllocLoadStoreOpt::RescheduleOps, LastOp should be the last operation which we want to merge. If we break out of the loop because an operation has the wrong offset, we shouldn't use that operation as LastOp. This patch fixes some cases where we would move stores to the wrong insert point. Re-commit with a fix to increment NumMove in the right place. Differential Revision: https://reviews.llvm.org/D30124 llvm-svn: 296815
1 parent 0a8ada5 commit bb82127

File tree

3 files changed

+76
-25
lines changed

3 files changed

+76
-25
lines changed
 

‎llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp

+19-12
Original file line numberDiff line numberDiff line change
@@ -2161,33 +2161,40 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
21612161
unsigned LastBytes = 0;
21622162
unsigned NumMove = 0;
21632163
for (int i = Ops.size() - 1; i >= 0; --i) {
2164+
// Make sure each operation has the same kind.
21642165
MachineInstr *Op = Ops[i];
2165-
unsigned Loc = MI2LocMap[Op];
2166-
if (Loc <= FirstLoc) {
2167-
FirstLoc = Loc;
2168-
FirstOp = Op;
2169-
}
2170-
if (Loc >= LastLoc) {
2171-
LastLoc = Loc;
2172-
LastOp = Op;
2173-
}
2174-
21752166
unsigned LSMOpcode
21762167
= getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
21772168
if (LastOpcode && LSMOpcode != LastOpcode)
21782169
break;
21792170

2171+
// Check that we have a continuous set of offsets.
21802172
int Offset = getMemoryOpOffset(*Op);
21812173
unsigned Bytes = getLSMultipleTransferSize(Op);
21822174
if (LastBytes) {
21832175
if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
21842176
break;
21852177
}
2178+
2179+
// Don't try to reschedule too many instructions.
2180+
if (NumMove == 8) // FIXME: Tune this limit.
2181+
break;
2182+
2183+
// Found a mergable instruction; save information about it.
2184+
++NumMove;
21862185
LastOffset = Offset;
21872186
LastBytes = Bytes;
21882187
LastOpcode = LSMOpcode;
2189-
if (++NumMove == 8) // FIXME: Tune this limit.
2190-
break;
2188+
2189+
unsigned Loc = MI2LocMap[Op];
2190+
if (Loc <= FirstLoc) {
2191+
FirstLoc = Loc;
2192+
FirstOp = Op;
2193+
}
2194+
if (Loc >= LastLoc) {
2195+
LastLoc = Loc;
2196+
LastOp = Op;
2197+
}
21912198
}
21922199

21932200
if (NumMove <= 1)

‎llvm/test/CodeGen/ARM/ldm-stm-i256.ll

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; RUN: llc -mtriple=armv7--eabi -verify-machineinstrs < %s | FileCheck %s
2+
3+
; Check the way we schedule/merge a bunch of loads and stores.
4+
; Originally test/CodeGen/ARM/2011-07-07-ScheduleDAGCrash.ll ; now
5+
; being used as a test of optimizations related to ldm/stm.
6+
7+
; FIXME: We could merge more loads/stores with regalloc hints.
8+
; FIXME: Fix scheduling so we don't have 16 live registers.
9+
10+
define void @f(i256* nocapture %a, i256* nocapture %b, i256* nocapture %cc, i256* nocapture %dd) nounwind uwtable noinline ssp {
11+
entry:
12+
%c = load i256, i256* %cc
13+
%d = load i256, i256* %dd
14+
%add = add nsw i256 %c, %d
15+
store i256 %add, i256* %a, align 8
16+
%or = or i256 %c, 1606938044258990275541962092341162602522202993782792835301376
17+
%add6 = add nsw i256 %or, %d
18+
store i256 %add6, i256* %b, align 8
19+
ret void
20+
; CHECK-DAG: ldm r3
21+
; CHECK-DAG: ldm r2
22+
; CHECK-DAG: ldr {{.*}}, [r3, #20]
23+
; CHECK-DAG: ldr {{.*}}, [r3, #16]
24+
; CHECK-DAG: ldr {{.*}}, [r3, #28]
25+
; CHECK-DAG: ldr {{.*}}, [r3, #24]
26+
; CHECK-DAG: ldr {{.*}}, [r2, #20]
27+
; CHECK-DAG: ldr {{.*}}, [r2, #16]
28+
; CHECK-DAG: ldr {{.*}}, [r2, #28]
29+
; CHECK-DAG: ldr {{.*}}, [r2, #24]
30+
; CHECK-DAG: stmib r0
31+
; CHECK-DAG: str {{.*}}, [r0]
32+
; CHECK-DAG: str {{.*}}, [r0, #24]
33+
; CHECK-DAG: str {{.*}}, [r0, #28]
34+
; CHECK-DAG: str {{.*}}, [r1]
35+
; CHECK-DAG: stmib r1
36+
; CHECK-DAG: str {{.*}}, [r1, #24]
37+
; CHECK-DAG: str {{.*}}, [r1, #28]
38+
}

‎llvm/test/CodeGen/ARM/prera-ldst-insertpt.mir

+19-13
Original file line numberDiff line numberDiff line change
@@ -36,19 +36,22 @@ body: |
3636
t2STRi12 %1, %0, 0, 14, _ :: (store 4)
3737
%10 : rgpr = t2LSLri %2, 1, 14, _, _
3838
t2STRi12 killed %10, %0, 4, 14, _ :: (store 4)
39+
40+
; Make sure we move the paired stores next to each other, and
41+
; insert them in an appropriate location.
42+
; CHECK: t2STRi12 %1,
43+
; CHECK-NEXT: t2STRi12 killed %10,
44+
; CHECK-NEXT: t2MOVi
45+
; CHECK-NEXT: t2ADDrs
46+
3947
%11 : rgpr = t2MOVi 55, 14, _, _
4048
%12 : gprnopc = t2ADDrs %11, killed %7, 19, 14, _, _
4149
t2STRi12 killed %12, %0, 16, 14, _ :: (store 4)
4250
%13 : gprnopc = t2ADDrs %11, killed %9, 19, 14, _, _
4351
t2STRi12 killed %13, %0, 20, 14, _ :: (store 4)
4452
4553
; Make sure we move the paired stores next to each other.
46-
; FIXME: Make sure we don't extend the live-range of a store
47-
; when we don't need to.
48-
; CHECK: t2STRi12 %1,
49-
; CHECK-NEXT: t2STRi12 killed %10,
50-
; CHECK-NEXT: %13 = t2ADDrs %11
51-
; CHECK-NEXT: t2STRi12 killed %12,
54+
; CHECK: t2STRi12 killed %12,
5255
; CHECK-NEXT: t2STRi12 killed %13,
5356
5457
tBX_RET 14, _
@@ -73,6 +76,15 @@ body: |
7376
t2STRi12 killed %10, %0, 4, 14, _ :: (store 4)
7477
%3 : rgpr = t2MUL %2, %2, 14, _
7578
t2STRi12 %3, %0, 8, 14, _ :: (store 4)
79+
80+
; Make sure we move the paired stores next to each other, and
81+
; insert them in an appropriate location.
82+
; CHECK: t2STRi12 {{.*}}, 0
83+
; CHECK-NEXT: t2STRi12 {{.*}}, 4
84+
; CHECK-NEXT: t2STRi12 {{.*}}, 8
85+
; CHECK-NEXT: t2MUL
86+
; CHECK-NEXT: t2MOVi32imm
87+
7688
%4 : rgpr = t2MUL %1, %1, 14, _
7789
%5 : rgpr = t2MOVi32imm -858993459
7890
%6 : rgpr, %7 : rgpr = t2UMULL killed %3, %5, 14, _
@@ -85,13 +97,7 @@ body: |
8597
t2STRi12 killed %13, %0, 20, 14, _ :: (store 4)
8698
8799
; Make sure we move the paired stores next to each other.
88-
; FIXME: Make sure we don't extend the live-range of a store
89-
; when we don't need to.
90-
; CHECK: t2STRi12 {{.*}}, 0
91-
; CHECK-NEXT: t2STRi12 {{.*}}, 4
92-
; CHECK-NEXT: t2STRi12 {{.*}}, 8
93-
; CHECK-NEXT: t2ADDrs
94-
; CHECK-NEXT: t2STRi12 {{.*}}, 16
100+
; CHECK: t2STRi12 {{.*}}, 16
95101
; CHECK-NEXT: t2STRi12 {{.*}}, 20
96102
97103
tBX_RET 14, _

0 commit comments

Comments
 (0)