Skip to content

Commit fd46bea

Browse files
Hao LiuHao Liu
Hao Liu
authored and
Hao Liu
committedNov 19, 2014
[AArch64] Enable SeparateConstOffsetFromGEP, EarlyCSE and LICM passes on AArch64 backend.
SeparateConstOffsetFromGEP can gives more optimizaiton opportunities related to GEPs, which benefits EarlyCSE and LICM. By enabling these passes we can have better address calculations and generate a better addressing mode. Some SPEC 2006 benchmarks (astar, gobmk, namd) have obvious improvements on Cortex-A57. Reviewed in http://reviews.llvm.org/D5864. llvm-svn: 222331
1 parent b508eb2 commit fd46bea

File tree

4 files changed

+183
-2
lines changed

4 files changed

+183
-2
lines changed
 

‎llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

+18
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,11 @@ EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
8181
cl::desc("Work around Cortex-A53 erratum 835769"),
8282
cl::init(false));
8383

84+
static cl::opt<bool>
85+
EnableGEPOpt("aarch64-gep-opt", cl::Hidden,
86+
cl::desc("Enable optimizations on complex GEPs"),
87+
cl::init(true));
88+
8489
extern "C" void LLVMInitializeAArch64Target() {
8590
// Register the target.
8691
RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget);
@@ -205,6 +210,19 @@ void AArch64PassConfig::addIRPasses() {
205210
addPass(createCFGSimplificationPass());
206211

207212
TargetPassConfig::addIRPasses();
213+
214+
if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
215+
// Call SeparateConstOffsetFromGEP pass to extract constants within indices
216+
// and lower a GEP with multiple indices to either arithmetic operations or
217+
// multiple GEPs with single index.
218+
addPass(createSeparateConstOffsetFromGEPPass(TM, true));
219+
// Call EarlyCSE pass to find and remove subexpressions in the lowered
220+
// result.
221+
addPass(createEarlyCSEPass());
222+
// Do loop invariant code motion in case part of the lowered result is
223+
// invariant.
224+
addPass(createLICMPass());
225+
}
208226
}
209227

210228
// Pass Pipeline Configuration
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
; RUN: llc -O3 -verify-machineinstrs %s -o - | FileCheck %s
2+
; RUN: llc -O3 -print-after=codegenprepare -mcpu=cyclone < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s
3+
; RUN: llc -O3 -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s
4+
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
5+
target triple = "aarch64-linux-gnueabi"
6+
7+
; Following test cases test enabling SeparateConstOffsetFromGEP pass in AArch64
8+
; backend. If useAA() returns true, it will lower a GEP with multiple indices
9+
; into GEPs with a single index, otherwise it will lower it into a
10+
; "ptrtoint+arithmetics+inttoptr" form.
11+
12+
%struct = type { i32, i32, i32, i32, [20 x i32] }
13+
14+
; Check that when two complex GEPs are used in two basic blocks, LLVM can
15+
; elimilate the common subexpression for the second use.
16+
define void @test_GEP_CSE([240 x %struct]* %string, i32* %adj, i32 %lib, i64 %idxprom) {
17+
%liberties = getelementptr [240 x %struct]* %string, i64 1, i64 %idxprom, i32 3
18+
%1 = load i32* %liberties, align 4
19+
%cmp = icmp eq i32 %1, %lib
20+
br i1 %cmp, label %if.then, label %if.end
21+
22+
if.then: ; preds = %entry
23+
%origin = getelementptr [240 x %struct]* %string, i64 1, i64 %idxprom, i32 2
24+
%2 = load i32* %origin, align 4
25+
store i32 %2, i32* %adj, align 4
26+
br label %if.end
27+
28+
if.end: ; preds = %if.then, %entry
29+
ret void
30+
}
31+
32+
; CHECK-LABEL: test_GEP_CSE:
33+
; CHECK: madd
34+
; CHECK: ldr
35+
; CHECK-NOT: madd
36+
; CHECK:ldr
37+
38+
; CHECK-NoAA-LABEL: @test_GEP_CSE(
39+
; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint [240 x %struct]* %string to i64
40+
; CHECK-NoAA: [[PTR1:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
41+
; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], [[PTR1]]
42+
; CHECK-NoAA: add i64 [[PTR2]], 23052
43+
; CHECK-NoAA: inttoptr
44+
; CHECK-NoAA: if.then:
45+
; CHECK-NoAA-NOT: ptrtoint
46+
; CHECK-NoAA-NOT: mul
47+
; CHECK-NoAA: add i64 [[PTR2]], 23048
48+
; CHECK-NoAA: inttoptr
49+
50+
; CHECK-UseAA-LABEL: @test_GEP_CSE(
51+
; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = bitcast [240 x %struct]* %string to i8*
52+
; CHECK-UseAA: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
53+
; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8* [[PTR0]], i64 [[IDX]]
54+
; CHECK-UseAA: getelementptr i8* [[PTR1]], i64 23052
55+
; CHECK-UseAA: bitcast
56+
; CHECK-UseAA: if.then:
57+
; CHECK-UseAA: getelementptr i8* [[PTR1]], i64 23048
58+
; CHECK-UseAA: bitcast
59+
60+
%class.my = type { i32, [128 x i32], i32, [256 x %struct.pt]}
61+
%struct.pt = type { %struct.point*, i32, i32 }
62+
%struct.point = type { i32, i32 }
63+
64+
; Check when a GEP is used across two basic block, LLVM can sink the address
65+
; calculation and code gen can generate a better addressing mode for the second
66+
; use.
67+
define void @test_GEP_across_BB(%class.my* %this, i64 %idx) {
68+
%1 = getelementptr %class.my* %this, i64 0, i32 3, i64 %idx, i32 1
69+
%2 = load i32* %1, align 4
70+
%3 = getelementptr %class.my* %this, i64 0, i32 3, i64 %idx, i32 2
71+
%4 = load i32* %3, align 4
72+
%5 = icmp eq i32 %2, %4
73+
br i1 %5, label %if.true, label %exit
74+
75+
if.true:
76+
%6 = shl i32 %4, 1
77+
store i32 %6, i32* %3, align 4
78+
br label %exit
79+
80+
exit:
81+
%7 = add nsw i32 %4, 1
82+
store i32 %7, i32* %1, align 4
83+
ret void
84+
}
85+
; CHECK-LABEL: test_GEP_across_BB:
86+
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #528]
87+
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #532]
88+
; CHECK-NOT: add
89+
; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #532]
90+
; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #528]
91+
92+
; CHECK-NoAA-LABEL: test_GEP_across_BB(
93+
; CHECK-NoAA: add i64 [[TMP:%[a-zA-Z0-9]+]], 528
94+
; CHECK-NoAA: add i64 [[TMP]], 532
95+
; CHECK-NoAA: if.true:
96+
; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 [[TMP]], 532
97+
; CHECK-NoAA: exit:
98+
; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 [[TMP]], 528
99+
100+
; CHECK-UseAA-LABEL: test_GEP_across_BB(
101+
; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = getelementptr
102+
; CHECK-UseAA: getelementptr i8* [[PTR0]], i64 528
103+
; CHECK-UseAA: getelementptr i8* [[PTR0]], i64 532
104+
; CHECK-UseAA: if.true:
105+
; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8* [[PTR0]], i64 532
106+
; CHECK-UseAA: exit:
107+
; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8* [[PTR0]], i64 528
108+
109+
%struct.S = type { float, double }
110+
@struct_array = global [1024 x %struct.S] zeroinitializer, align 16
111+
112+
; The following two test cases check we can extract constant from indices of
113+
; struct type.
114+
; The constant offsets are from indices "i64 %idxprom" and "i32 1". As the
115+
; alloca size of %struct.S is 16, and "i32 1" is the 2rd element whose field
116+
; offset is 8, the total constant offset is (5 * 16 + 8) = 88.
117+
define double* @test-struct_1(i32 %i) {
118+
entry:
119+
%add = add nsw i32 %i, 5
120+
%idxprom = sext i32 %add to i64
121+
%p = getelementptr [1024 x %struct.S]* @struct_array, i64 0, i64 %idxprom, i32 1
122+
ret double* %p
123+
}
124+
; CHECK-NoAA-LABEL: @test-struct_1(
125+
; CHECK-NoAA-NOT: getelementptr
126+
; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, 88
127+
128+
; CHECK-UseAA-LABEL: @test-struct_1(
129+
; CHECK-UseAA: getelementptr i8* %{{[a-zA-Z0-9]+}}, i64 88
130+
131+
%struct3 = type { i64, i32 }
132+
%struct2 = type { %struct3, i32 }
133+
%struct1 = type { i64, %struct2 }
134+
%struct0 = type { i32, i32, i64*, [100 x %struct1] }
135+
136+
; The constant offsets are from indices "i32 3", "i64 %arrayidx" and "i32 1".
137+
; "i32 3" is the 4th element whose field offset is 16. The alloca size of
138+
; %struct1 is 32. "i32 1" is the 2rd element whose field offset is 8. So the
139+
; total constant offset is 16 + (-2 * 32) + 8 = -40
140+
define %struct2* @test-struct_2(%struct0* %ptr, i64 %idx) {
141+
entry:
142+
%arrayidx = add nsw i64 %idx, -2
143+
%ptr2 = getelementptr %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
144+
ret %struct2* %ptr2
145+
}
146+
; CHECK-NoAA-LABEL: @test-struct_2(
147+
; CHECK-NoAA-NOT: = getelementptr
148+
; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, -40
149+
150+
; CHECK-UseAA-LABEL: @test-struct_2(
151+
; CHECK-UseAA: getelementptr i8* %{{[a-zA-Z0-9]+}}, i64 -40
152+
153+
; Test that when a index is added from two constant, SeparateConstOffsetFromGEP
154+
; pass does not generate incorrect result.
155+
define void @test_const_add([3 x i32]* %in) {
156+
%inc = add nsw i32 2, 1
157+
%idxprom = sext i32 %inc to i64
158+
%arrayidx = getelementptr [3 x i32]* %in, i64 %idxprom, i64 2
159+
store i32 0, i32* %arrayidx, align 4
160+
ret void
161+
}
162+
; CHECK-LABEL: test_const_add:
163+
; CHECK: str wzr, [x0, #44]

‎llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llc -O3 -mtriple arm64-apple-ios3 %s -o - | FileCheck %s
1+
; RUN: llc -O3 -mtriple arm64-apple-ios3 -aarch64-gep-opt=false %s -o - | FileCheck %s
22
; <rdar://problem/13621857>
33

44
@block = common global i8* null, align 8

‎llvm/test/CodeGen/AArch64/arm64-cse.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llc -O3 < %s -aarch64-atomic-cfg-tidy=0 | FileCheck %s
1+
; RUN: llc -O3 < %s -aarch64-atomic-cfg-tidy=0 -aarch64-gep-opt=false | FileCheck %s
22
target triple = "arm64-apple-ios"
33

44
; rdar://12462006

0 commit comments

Comments
 (0)
Please sign in to comment.