Skip to content

Commit 3d2efdf

Browse files
committedOct 9, 2018
Recommit r343993: [X86] condition branches folding for three-way conditional codes
Fix the memory issue exposed by sanitizer. llvm-svn: 344085
1 parent bea5967 commit 3d2efdf

9 files changed

+947
-0
lines changed
 

‎llvm/lib/Target/X86/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ set(sources
2727
X86CallingConv.cpp
2828
X86CallLowering.cpp
2929
X86CmovConversion.cpp
30+
X86CondBrFolding.cpp
3031
X86DomainReassignment.cpp
3132
X86ExpandPseudo.cpp
3233
X86FastISel.cpp

‎llvm/lib/Target/X86/X86.h

+3
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ FunctionPass *createX86OptimizeLEAs();
7575
/// Return a pass that transforms setcc + movzx pairs into xor + setcc.
7676
FunctionPass *createX86FixupSetCC();
7777

78+
/// Return a pass that folds conditional branch jumps.
79+
FunctionPass *createX86CondBrFolding();
80+
7881
/// Return a pass that avoids creating store forward block issues in the hardware.
7982
FunctionPass *createX86AvoidStoreForwardingBlocks();
8083

‎llvm/lib/Target/X86/X86.td

+7
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,12 @@ def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
404404
"Indicates that the BEXTR instruction is implemented as a single uop "
405405
"with good throughput.">;
406406

407+
// Merge branches using three-way conditional code.
408+
def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
409+
"ThreewayBranchProfitable", "true",
410+
"Merge branches to a three-way "
411+
"conditional branch">;
412+
407413
//===----------------------------------------------------------------------===//
408414
// Register File Description
409415
//===----------------------------------------------------------------------===//
@@ -732,6 +738,7 @@ def SNBFeatures : ProcessorFeatures<[], [
732738
FeatureFastScalarFSQRT,
733739
FeatureFastSHLDRotate,
734740
FeatureSlowIncDec,
741+
FeatureMergeToThreeWayBranch,
735742
FeatureMacroFusion
736743
]>;
737744

‎llvm/lib/Target/X86/X86CondBrFolding.cpp

+579
Large diffs are not rendered by default.

‎llvm/lib/Target/X86/X86Subtarget.h

+4
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
419419
/// Indicates target prefers 256 bit instructions.
420420
bool Prefer256Bit = false;
421421

422+
/// Threeway branch is profitable in this subtarget.
423+
bool ThreewayBranchProfitable = false;
424+
422425
/// What processor and OS we're targeting.
423426
Triple TargetTriple;
424427

@@ -662,6 +665,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
662665
bool hasWAITPKG() const { return HasWAITPKG; }
663666
bool hasPCONFIG() const { return HasPCONFIG; }
664667
bool hasSGX() const { return HasSGX; }
668+
bool threewayBranchProfitable() const { return ThreewayBranchProfitable; }
665669
bool hasINVPCID() const { return HasINVPCID; }
666670
bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
667671
bool useRetpolineIndirectBranches() const {

‎llvm/lib/Target/X86/X86TargetMachine.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
5454
cl::desc("Enable the machine combiner pass"),
5555
cl::init(true), cl::Hidden);
5656

57+
static cl::opt<bool> EnableCondBrFoldingPass("x86-condbr-folding",
58+
cl::desc("Enable the conditional branch "
59+
"folding pass"),
60+
cl::init(true), cl::Hidden);
61+
5762
namespace llvm {
5863

5964
void initializeWinEHStatePassPass(PassRegistry &);
@@ -447,6 +452,8 @@ bool X86PassConfig::addGlobalInstructionSelect() {
447452
}
448453

449454
bool X86PassConfig::addILPOpts() {
455+
if (EnableCondBrFoldingPass)
456+
addPass(createX86CondBrFolding());
450457
addPass(&EarlyIfConverterID);
451458
if (EnableMachineCombinerPass)
452459
addPass(&MachineCombinerID);

‎llvm/test/CodeGen/X86/O3-pipeline.ll

+1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
; CHECK-NEXT: Merge disjoint stack slots
7373
; CHECK-NEXT: Local Stack Slot Allocation
7474
; CHECK-NEXT: Remove dead machine instructions
75+
; CHECK-NEXT: X86 CondBr Folding
7576
; CHECK-NEXT: MachineDominator Tree Construction
7677
; CHECK-NEXT: Machine Natural Loop Construction
7778
; CHECK-NEXT: Machine Trace Metrics

‎llvm/test/CodeGen/X86/condbr_if.ll

+178
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=sandybridge %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
2+
; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=ivybridge %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
3+
; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=haswell %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
4+
; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=broadwell %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
5+
; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skylake %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
6+
; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
7+
; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=NOTMERGE
8+
9+
define i32 @length2_1(i32) {
10+
%2 = icmp slt i32 %0, 3
11+
br i1 %2, label %3, label %5
12+
13+
; <label>:3:
14+
%4 = tail call i32 (...) @f1()
15+
br label %13
16+
17+
; <label>:5:
18+
%6 = icmp slt i32 %0, 40
19+
br i1 %6, label %7, label %13
20+
21+
; <label>:7:
22+
%8 = icmp eq i32 %0, 3
23+
br i1 %8, label %9, label %11
24+
25+
; <label>:9:
26+
%10 = tail call i32 (...) @f2()
27+
br label %11
28+
29+
; <label>:11:
30+
%12 = tail call i32 (...) @f3() #2
31+
br label %13
32+
33+
; <label>:13:
34+
ret i32 0
35+
}
36+
; MERGE-LABEL: length2_1
37+
; MERGE: cmpl $3
38+
; MERGE-NEXT: jg
39+
; MERGE-NEXT: jge
40+
; NOTMERGE-LABEL: length2_1
41+
; NOTMERGE: cmpl $2
42+
; NOTMERGE-NEXT: jg
43+
44+
define i32 @length2_2(i32) {
45+
%2 = icmp sle i32 %0, 2
46+
br i1 %2, label %3, label %5
47+
48+
; <label>:3:
49+
%4 = tail call i32 (...) @f1()
50+
br label %13
51+
52+
; <label>:5:
53+
%6 = icmp slt i32 %0, 40
54+
br i1 %6, label %7, label %13
55+
56+
; <label>:7:
57+
%8 = icmp eq i32 %0, 3
58+
br i1 %8, label %9, label %11
59+
60+
; <label>:9:
61+
%10 = tail call i32 (...) @f2()
62+
br label %11
63+
64+
; <label>:11:
65+
%12 = tail call i32 (...) @f3() #2
66+
br label %13
67+
68+
; <label>:13:
69+
ret i32 0
70+
}
71+
; MERGE-LABEL: length2_2
72+
; MERGE: cmpl $3
73+
; MERGE-NEXT: jg
74+
; MERGE-NEXT: jge
75+
; NOTMERGE-LABEL: length2_2
76+
; NOTMERGE: cmpl $2
77+
; NOTMERGE-NEXT: jg
78+
79+
define i32 @length2_3(i32) {
80+
%2 = icmp sgt i32 %0, 3
81+
br i1 %2, label %3, label %5
82+
83+
; <label>:3:
84+
%4 = tail call i32 (...) @f1()
85+
br label %13
86+
87+
; <label>:5:
88+
%6 = icmp sgt i32 %0, -40
89+
br i1 %6, label %7, label %13
90+
91+
; <label>:7:
92+
%8 = icmp eq i32 %0, 3
93+
br i1 %8, label %9, label %11
94+
95+
; <label>:9:
96+
%10 = tail call i32 (...) @f2()
97+
br label %11
98+
99+
; <label>:11:
100+
%12 = tail call i32 (...) @f3() #2
101+
br label %13
102+
103+
; <label>:13:
104+
ret i32 0
105+
}
106+
; MERGE-LABEL: length2_3
107+
; MERGE: cmpl $3
108+
; MERGE-NEXT: jl
109+
; MERGE-NEXT: jle
110+
; NOTMERGE-LABEL: length2_3
111+
; NOTMERGE: cmpl $4
112+
; NOTMERGE-NEXT: jl
113+
114+
define i32 @length2_4(i32) {
115+
%2 = icmp sge i32 %0, 4
116+
br i1 %2, label %3, label %5
117+
118+
; <label>:3:
119+
%4 = tail call i32 (...) @f1()
120+
br label %13
121+
122+
; <label>:5:
123+
%6 = icmp sgt i32 %0, -40
124+
br i1 %6, label %7, label %13
125+
126+
; <label>:7:
127+
%8 = icmp eq i32 %0, 3
128+
br i1 %8, label %9, label %11
129+
130+
; <label>:9:
131+
%10 = tail call i32 (...) @f2()
132+
br label %11
133+
134+
; <label>:11:
135+
%12 = tail call i32 (...) @f3() #2
136+
br label %13
137+
138+
; <label>:13:
139+
ret i32 0
140+
}
141+
; MERGE-LABEL: length2_4
142+
; MERGE: cmpl $3
143+
; MERGE-NEXT: jl
144+
; MERGE-NEXT: jle
145+
; NOTMERGE-LABEL: length2_4
146+
; NOTMERGE: cmpl $4
147+
; NOTMERGE-NEXT: jl
148+
149+
declare i32 @f1(...)
150+
declare i32 @f2(...)
151+
declare i32 @f3(...)
152+
153+
define i32 @length1_1(i32) {
154+
%2 = icmp sgt i32 %0, 5
155+
br i1 %2, label %3, label %5
156+
157+
; <label>:3:
158+
%4 = tail call i32 (...) @f1()
159+
br label %9
160+
161+
; <label>:5:
162+
%6 = icmp eq i32 %0, 5
163+
br i1 %6, label %7, label %9
164+
165+
; <label>:7:
166+
%8 = tail call i32 (...) @f2()
167+
br label %9
168+
169+
; <label>:9:
170+
ret i32 0
171+
}
172+
; MERGE-LABEL: length1_1
173+
; MERGE: cmpl $5
174+
; MERGE-NEXT: jl
175+
; MERGE-NEXT: jle
176+
; NOTMERGE-LABEL: length1_1
177+
; NOTMERGE: cmpl $6
178+
; NOTMERGE-NEXT: jl
+167
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=sandybridge %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
2+
; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=ivybridge %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
3+
; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=haswell %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
4+
; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=broadwell %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
5+
; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skylake %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
6+
; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
7+
; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=NOTMERGE
8+
9+
@v1 = common dso_local local_unnamed_addr global i32 0, align 4
10+
@v2 = common dso_local local_unnamed_addr global i32 0, align 4
11+
@v3 = common dso_local local_unnamed_addr global i32 0, align 4
12+
@v4 = common dso_local local_unnamed_addr global i32 0, align 4
13+
@v5 = common dso_local local_unnamed_addr global i32 0, align 4
14+
@v6 = common dso_local local_unnamed_addr global i32 0, align 4
15+
@v7 = common dso_local local_unnamed_addr global i32 0, align 4
16+
@v8 = common dso_local local_unnamed_addr global i32 0, align 4
17+
@v9 = common dso_local local_unnamed_addr global i32 0, align 4
18+
@v10 = common dso_local local_unnamed_addr global i32 0, align 4
19+
@v11 = common dso_local local_unnamed_addr global i32 0, align 4
20+
@v12 = common dso_local local_unnamed_addr global i32 0, align 4
21+
@v13 = common dso_local local_unnamed_addr global i32 0, align 4
22+
@v14 = common dso_local local_unnamed_addr global i32 0, align 4
23+
@v15 = common dso_local local_unnamed_addr global i32 0, align 4
24+
25+
define dso_local i32 @fourcases(i32 %n) {
26+
entry:
27+
switch i32 %n, label %return [
28+
i32 111, label %sw.bb
29+
i32 222, label %sw.bb1
30+
i32 3665, label %sw.bb2
31+
i32 4444, label %sw.bb4
32+
]
33+
34+
sw.bb:
35+
%0 = load i32, i32* @v1, align 4
36+
br label %return
37+
38+
sw.bb1:
39+
%1 = load i32, i32* @v2, align 4
40+
%add = add nsw i32 %1, 12
41+
br label %return
42+
43+
sw.bb2:
44+
%2 = load i32, i32* @v3, align 4
45+
%add3 = add nsw i32 %2, 13
46+
br label %return
47+
48+
sw.bb4:
49+
%3 = load i32, i32* @v1, align 4
50+
%4 = load i32, i32* @v2, align 4
51+
%add5 = add nsw i32 %4, %3
52+
br label %return
53+
54+
return:
55+
%retval.0 = phi i32 [ %add5, %sw.bb4 ], [ %add3, %sw.bb2 ], [ %add, %sw.bb1 ], [ %0, %sw.bb ], [ 0, %entry ]
56+
ret i32 %retval.0
57+
}
58+
; MERGE-LABEL: fourcases
59+
; MERGE: cmpl $3665
60+
; MERGE-NEXT: jg
61+
; MERGE-NEXT: jge
62+
; NOTMERGE: cmpl $3664
63+
; NOTMERGE-NEXT: jg
64+
65+
define dso_local i32 @fifteencases(i32) {
66+
switch i32 %0, label %32 [
67+
i32 -111, label %2
68+
i32 -13, label %4
69+
i32 25, label %6
70+
i32 37, label %8
71+
i32 89, label %10
72+
i32 111, label %12
73+
i32 213, label %14
74+
i32 271, label %16
75+
i32 283, label %18
76+
i32 325, label %20
77+
i32 327, label %22
78+
i32 429, label %24
79+
i32 500, label %26
80+
i32 603, label %28
81+
i32 605, label %30
82+
]
83+
84+
; <label>:2
85+
%3 = load i32, i32* @v1, align 4
86+
br label %32
87+
88+
; <label>:4
89+
%5 = load i32, i32* @v2, align 4
90+
br label %32
91+
92+
; <label>:6
93+
%7 = load i32, i32* @v3, align 4
94+
br label %32
95+
96+
; <label>:8
97+
%9 = load i32, i32* @v4, align 4
98+
br label %32
99+
100+
; <label>:10
101+
%11 = load i32, i32* @v5, align 4
102+
br label %32
103+
104+
; <label>:12
105+
%13 = load i32, i32* @v6, align 4
106+
br label %32
107+
108+
; <label>:14
109+
%15 = load i32, i32* @v7, align 4
110+
br label %32
111+
112+
; <label>:16
113+
%17 = load i32, i32* @v8, align 4
114+
br label %32
115+
116+
; <label>:18
117+
%19 = load i32, i32* @v9, align 4
118+
br label %32
119+
120+
; <label>:20
121+
%21 = load i32, i32* @v10, align 4
122+
br label %32
123+
124+
; <label>:22
125+
%23 = load i32, i32* @v11, align 4
126+
br label %32
127+
128+
; <label>:24
129+
%25 = load i32, i32* @v12, align 4
130+
br label %32
131+
132+
; <label>:26
133+
%27 = load i32, i32* @v13, align 4
134+
br label %32
135+
136+
; <label>:28:
137+
%29 = load i32, i32* @v14, align 4
138+
br label %32
139+
140+
; <label>:30:
141+
%31 = load i32, i32* @v15, align 4
142+
br label %32
143+
144+
; <label>:32:
145+
%33 = phi i32 [ %31, %30 ], [ %29, %28 ], [ %27, %26 ], [ %25, %24 ], [ %23, %22 ], [ %21, %20 ], [ %19, %18 ], [ %17, %16 ], [ %15, %14 ], [ %13, %12 ], [ %11, %10 ], [ %9, %8 ], [ %7, %6 ], [ %5, %4 ], [ %3, %2 ], [ 0, %1 ]
146+
ret i32 %33
147+
}
148+
; MERGE-LABEL: fifteencases
149+
; MERGE: cmpl $271
150+
; MERGE-NEXT: jg
151+
; MERGE-NEXT: jge
152+
; MERGE: cmpl $37
153+
; MERGE-NEXT: jg
154+
; MERGE-NEXT: jge
155+
; MERGE: cmpl $429
156+
; MERGE-NEXT: jg
157+
; MERGE-NEXT: jge
158+
; MERGE: cmpl $325
159+
; MERGE-NEXT: jg
160+
; MERGE-NEXT: jge
161+
; MERGE: cmpl $603
162+
; MERGE-NEXT: jg
163+
; MERGE-NEXT: jge
164+
; NOTMERGE-LABEL: fifteencases
165+
; NOTMERGE: cmpl $270
166+
; NOTMERGE-NEXT: jle
167+

0 commit comments

Comments
 (0)
Please sign in to comment.