Skip to content

Commit 9f4530b

Browse files
committedJul 24, 2014
[SDAG] Introduce a combined set to the DAG combiner which tracks nodes
which have successfully round-tripped through the combine phase, and use this to ensure all operands to DAG nodes are visited by the combiner, even if they are only added during the combine phase. This is critical to have the combiner reach nodes that are *introduced* during combining. Previously these would sometimes be visited and sometimes not be visited based on whether they happened to end up on the worklist or not. Now we always run them through the combiner. This fixes quite a few bad codegen test cases lurking in the suite while also being more principled. Among these, the TLS codegeneration is particularly exciting for programs that have this in the critical path like TSan-instrumented binaries (although I think they engineer to use a different TLS that is faster anyways). I've tried to check for compile-time regressions here by running llc over a merged (but not LTO-ed) clang bitcode file and observed at most a 3% slowdown in llc. Given that this is essentially a worst case (none of opt or clang are running at this phase) I think this is tolerable. The actual LTO case should be even less costly, and the cost in normal compilation should be negligible. With this combining logic, it is possible to re-legalize as we combine which is necessary to implement PSHUFB formation on x86 as a post-legalize DAG combine (my ultimate goal). Differential Revision: http://reviews.llvm.org/D4638 llvm-svn: 213898
1 parent 80b8694 commit 9f4530b

19 files changed

+95
-137
lines changed
 

‎llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+21-5
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ namespace {
104104
/// stable indices of nodes within the worklist.
105105
DenseMap<SDNode *, unsigned> WorklistMap;
106106

107+
/// \brief Set of nodes which have been combined (at least once).
108+
///
109+
/// This is used to allow us to reliably add any operands of a DAG node
110+
/// which have not yet been combined to the worklist.
111+
SmallPtrSet<SDNode *, 64> CombinedNodes;
112+
107113
// AA - Used for DAG load/store alias analysis.
108114
AliasAnalysis &AA;
109115

@@ -136,6 +142,8 @@ namespace {
136142
/// removeFromWorklist - remove all instances of N from the worklist.
137143
///
138144
void removeFromWorklist(SDNode *N) {
145+
CombinedNodes.erase(N);
146+
139147
auto It = WorklistMap.find(N);
140148
if (It == WorklistMap.end())
141149
return; // Not in the worklist.
@@ -1152,6 +1160,17 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
11521160
if (recursivelyDeleteUnusedNodes(N))
11531161
continue;
11541162

1163+
DEBUG(dbgs() << "\nCombining: ";
1164+
N->dump(&DAG));
1165+
1166+
// Add any operands of the new node which have not yet been combined to the
1167+
// worklist as well. Because the worklist uniques things already, this
1168+
// won't repeatedly process the same operand.
1169+
CombinedNodes.insert(N);
1170+
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1171+
if (!CombinedNodes.count(N->getOperand(i).getNode()))
1172+
AddToWorklist(N->getOperand(i).getNode());
1173+
11551174
WorklistRemover DeadNodes(*this);
11561175

11571176
SDValue RV = combine(N);
@@ -1172,11 +1191,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
11721191
RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
11731192
"Node was deleted but visit returned new node!");
11741193

1175-
DEBUG(dbgs() << "\nReplacing.3 ";
1176-
N->dump(&DAG);
1177-
dbgs() << "\nWith: ";
1178-
RV.getNode()->dump(&DAG);
1179-
dbgs() << '\n');
1194+
DEBUG(dbgs() << " ... into: ";
1195+
RV.getNode()->dump(&DAG));
11801196

11811197
// Transfer debug value.
11821198
DAG.TransferDbgValues(SDValue(N, 0), RV);

‎llvm/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll

-46
This file was deleted.

‎llvm/test/CodeGen/ARM/aapcs-hfa-code.ll

+2-4
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,10 @@ define arm_aapcs_vfpcc void @test_1double_misaligned([4 x double], [4 x double],
9292
call arm_aapcs_vfpcc void @test_1double_misaligned([4 x double] undef, [4 x double] undef, float undef, double 1.0)
9393

9494
; CHECK-LABEL: test_1double_misaligned:
95-
; CHECK-DAG: mov [[ONELO:r[0-9]+]], #0
96-
; CHECK-DAG: mov r[[BASE:[0-9]+]], sp
9795
; CHECK-DAG: movw [[ONEHI:r[0-9]+]], #0
96+
; CHECK-DAG: mov [[ONELO:r[0-9]+]], #0
9897
; CHECK-DAG: movt [[ONEHI]], #16368
99-
; CHECK-DAG: str [[ONELO]], [r[[BASE]], #8]!
100-
; CHECK-DAG: str [[ONEHI]], [r[[BASE]], #4]
98+
; CHECK-DAG: strd [[ONELO]], [[ONEHI]], [sp, #8]
10199

102100
; CHECK-M4F-LABEL: test_1double_misaligned:
103101
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0

‎llvm/test/CodeGen/Mips/cmov.ll

+6-21
Original file line numberDiff line numberDiff line change
@@ -757,24 +757,9 @@ define i32 @slti6(i32 %a) nounwind readnone {
757757

758758
; ALL-LABEL: slti6:
759759

760-
; 32-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
761-
; 32-CMOV-DAG: xori [[R1]], [[R1]], 1
762-
; 32-CMOV-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
763-
; 32-CMOV-NOT: movn
764-
765-
; 32-CMP-DAG: slti [[R1:\$[0-9]+]], $4, 7
766-
; 32-CMP-DAG: xori [[R1]], [[R1]], 1
767-
; 32-CMP-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
768-
; 32-CMP-NOT: seleqz
769-
; 32-CMP-NOT: selnez
770-
771-
; 64-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
772-
; 64-CMOV-DAG: xori [[R1]], [[R1]], 1
773-
; 64-CMOV-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
774-
; 64-CMOV-NOT: movn
775-
776-
; 64-CMP-DAG: slti [[R1:\$[0-9]+]], $4, 7
777-
; 64-CMP-DAG: xori [[R1]], [[R1]], 1
778-
; 64-CMP-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
779-
; 64-CMP-NOT: seleqz
780-
; 64-CMP-NOT: selnez
760+
; ALL-DAG: addiu [[R1:\$[0-9]+]], $zero, 6
761+
; ALL-DAG: slt [[R1]], [[R1]], $4
762+
; ALL-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
763+
; ALL-NOT: movn
764+
; ALL-NOT: seleqz
765+
; ALL-NOT: selnez

‎llvm/test/CodeGen/R600/add_i64.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> add
7070
}
7171

7272
; SI-LABEL: @trunc_i64_add_to_i32
73-
; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG0:[0-9]+]]
74-
; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG1:[0-9]+]]
73+
; SI: S_LOAD_DWORD s[[SREG0:[0-9]+]]
74+
; SI: S_LOAD_DWORD s[[SREG1:[0-9]+]]
7575
; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
7676
; SI-NOT: ADDC
7777
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]

‎llvm/test/CodeGen/R600/or.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,10 @@ define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64
116116
}
117117

118118
; SI-LABEL: @trunc_i64_or_to_i32
119-
; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG0:[0-9]+]]
120-
; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG1:[0-9]+]]
121-
; SI: S_OR_B32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
122-
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
119+
; SI: S_LOAD_DWORD s[[SREG0:[0-9]+]]
120+
; SI: S_LOAD_DWORD s[[SREG1:[0-9]+]]
121+
; SI: S_OR_B32 s[[SRESULT:[0-9]+]], s[[SREG1]], s[[SREG0]]
122+
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], s[[SRESULT]]
123123
; SI: BUFFER_STORE_DWORD [[VRESULT]],
124124
define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
125125
%add = or i64 %b, %a

‎llvm/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ entry:
77
%tmp1 = bitcast double %a to <8 x i8>
88
%tmp2 = bitcast double %b to <8 x i8>
99
%tmp3 = add <8 x i8> %tmp1, %tmp2
10-
; CHECK: paddw
10+
; CHECK: paddb
1111
store <8 x i8> %tmp3, <8 x i8>* null
1212
ret void
1313
}
@@ -18,7 +18,7 @@ entry:
1818
%tmp1 = bitcast double %a to <4 x i16>
1919
%tmp2 = bitcast double %b to <4 x i16>
2020
%tmp3 = add <4 x i16> %tmp1, %tmp2
21-
; CHECK: paddd
21+
; CHECK: paddw
2222
store <4 x i16> %tmp3, <4 x i16>* null
2323
ret void
2424
}
@@ -29,7 +29,7 @@ entry:
2929
%tmp1 = bitcast double %a to <2 x i32>
3030
%tmp2 = bitcast double %b to <2 x i32>
3131
%tmp3 = add <2 x i32> %tmp1, %tmp2
32-
; CHECK: paddq
32+
; CHECK: paddd
3333
store <2 x i32> %tmp3, <2 x i32>* null
3434
ret void
3535
}

‎llvm/test/CodeGen/X86/i8-umulo.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
declare {i8, i1} @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
55
define i8 @testumulo(i32 %argc) {
6-
; CHECK: imulw
6+
; CHECK: imull
77
; CHECK: testb %{{.+}}, %{{.+}}
88
; CHECK: je [[NOOVERFLOWLABEL:.+]]
99
; CHECK: {{.*}}[[NOOVERFLOWLABEL]]:

‎llvm/test/CodeGen/X86/jump_sign.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ entry:
284284
define i32 @func_test1(i32 %p1) nounwind uwtable {
285285
entry:
286286
; CHECK-LABEL: func_test1:
287-
; CHECK: testb
287+
; CHECK: andb
288288
; CHECK: j
289289
; CHECK: ret
290290
%0 = load i32* @b, align 4

‎llvm/test/CodeGen/X86/lower-bitcast.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,13 @@ define i64 @test4(i64 %A) {
6868
%2 = bitcast <2 x i32> %add to i64
6969
ret i64 %2
7070
}
71-
; FIXME: At the moment we still produce the sequence pshufd+paddq+pshufd.
71+
; FIXME: At the moment we still produce the sequence pshufd+paddd+pshufd.
7272
; Ideally, we should fold that sequence into a single paddd. This is fixed with
7373
; the widening legalization.
7474
;
7575
; CHECK-LABEL: test4
7676
; CHECK: pshufd
77-
; CHECK-NEXT: paddq
77+
; CHECK-NEXT: paddd
7878
; CHECK-NEXT: pshufd
7979
; CHECK: ret
8080
;

‎llvm/test/CodeGen/X86/pr15267.ll

+18-15
Original file line numberDiff line numberDiff line change
@@ -48,19 +48,22 @@ define <4 x i64> @test3(<4 x i1>* %in) nounwind {
4848

4949
; CHECK: test3
5050
; CHECK: movzbl
51-
; CHECK: shrl
52-
; CHECK: andl $1
53-
; CHECK: andl $1
54-
; CHECK: vmovd
55-
; CHECK: pinsrd $1
56-
; CHECK: shrl $2
57-
; CHECK: andl $1
58-
; CHECK: pinsrd $2
59-
; CHECK: shrl $3
60-
; CHECK: andl $1
61-
; CHECK: pinsrd $3
62-
; CHECK: pslld
63-
; CHECK: psrad
64-
; CHECK: pmovsxdq
65-
; CHECK: pmovsxdq
51+
; CHECK: movq
52+
; CHECK: shlq
53+
; CHECK: sarq
54+
; CHECK: vmovq
55+
; CHECK: movq
56+
; CHECK: shlq
57+
; CHECK: sarq
58+
; CHECK: vmovq
59+
; CHECK: vpunpcklqdq
60+
; CHECK: movq
61+
; CHECK: shlq
62+
; CHECK: sarq
63+
; CHECK: vmovq
64+
; CHECK: shlq
65+
; CHECK: sarq
66+
; CHECK: vmovq
67+
; CHECK: vpunpcklqdq
68+
; CHECK: vinsertf128
6669
; CHECK: ret

‎llvm/test/CodeGen/X86/store-narrow.ll

+5-5
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ entry:
3434
; X64: movb %sil, 1(%rdi)
3535

3636
; X32-LABEL: test2:
37-
; X32: movzbl 8(%esp), %e[[REG:[abcd]]]x
37+
; X32: movb 8(%esp), %[[REG:[abcd]]]l
3838
; X32: movb %[[REG]]l, 1(%{{.*}})
3939
}
4040

@@ -67,8 +67,8 @@ entry:
6767
; X64: movw %si, 2(%rdi)
6868

6969
; X32-LABEL: test4:
70-
; X32: movl 8(%esp), %e[[REG:[abcd]x]]
71-
; X32: movw %[[REG]], 2(%{{.*}})
70+
; X32: movw 8(%esp), %[[REG:[abcd]]]x
71+
; X32: movw %[[REG]]x, 2(%{{.*}})
7272
}
7373

7474
define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
@@ -84,8 +84,8 @@ entry:
8484
; X64: movw %si, 2(%rdi)
8585

8686
; X32-LABEL: test5:
87-
; X32: movzwl 8(%esp), %e[[REG:[abcd]x]]
88-
; X32: movw %[[REG]], 2(%{{.*}})
87+
; X32: movw 8(%esp), %[[REG:[abcd]]]x
88+
; X32: movw %[[REG]]x, 2(%{{.*}})
8989
}
9090

9191
define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {

‎llvm/test/CodeGen/X86/trunc-ext-ld-st.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ define void @load_2_i16(<2 x i16>* %A) {
3232

3333
;CHECK-LABEL: load_2_i32:
3434
;CHECK: pmovzxdq
35-
;CHECK: paddq
35+
;CHECK: paddd
3636
;CHECK: pshufd
3737
;CHECK: ret
3838
define void @load_2_i32(<2 x i32>* %A) {
@@ -56,7 +56,7 @@ define void @load_4_i8(<4 x i8>* %A) {
5656

5757
;CHECK-LABEL: load_4_i16:
5858
;CHECK: pmovzxwd
59-
;CHECK: paddd
59+
;CHECK: paddw
6060
;CHECK: pshufb
6161
;CHECK: ret
6262
define void @load_4_i16(<4 x i16>* %A) {
@@ -68,7 +68,7 @@ define void @load_4_i16(<4 x i16>* %A) {
6868

6969
;CHECK-LABEL: load_8_i8:
7070
;CHECK: pmovzxbw
71-
;CHECK: paddw
71+
;CHECK: paddb
7272
;CHECK: pshufb
7373
;CHECK: ret
7474
define void @load_8_i8(<8 x i8>* %A) {

‎llvm/test/CodeGen/X86/vector-idiv.ll

+9-5
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ define <4 x i32> @test8(<4 x i32> %a) {
122122
; SSE41-LABEL: test8:
123123
; SSE41: pmuldq
124124
; SSE41: pshufd $49
125-
; SSE41-NOT: pshufd $49
125+
; SSE41: pshufd $49
126126
; SSE41: pmuldq
127127
; SSE41: shufps $-35
128128
; SSE41: pshufd $-40
@@ -134,7 +134,7 @@ define <4 x i32> @test8(<4 x i32> %a) {
134134
; SSE-LABEL: test8:
135135
; SSE: pmuludq
136136
; SSE: pshufd $49
137-
; SSE-NOT: pshufd $49
137+
; SSE: pshufd $49
138138
; SSE: pmuludq
139139
; SSE: shufps $-35
140140
; SSE: pshufd $-40
@@ -147,7 +147,7 @@ define <4 x i32> @test8(<4 x i32> %a) {
147147
; AVX-LABEL: test8:
148148
; AVX: vpmuldq
149149
; AVX: vpshufd $49
150-
; AVX-NOT: vpshufd $49
150+
; AVX: vpshufd $49
151151
; AVX: vpmuldq
152152
; AVX: vshufps $-35
153153
; AVX: vpshufd $-40
@@ -162,10 +162,12 @@ define <8 x i32> @test9(<8 x i32> %a) {
162162
ret <8 x i32> %div
163163

164164
; AVX-LABEL: test9:
165-
; AVX: vpalignr $4
166165
; AVX: vpbroadcastd
166+
; AVX: vpalignr $4
167+
; AVX: vpalignr $4
167168
; AVX: vpmuldq
168169
; AVX: vpmuldq
170+
; AVX: vpalignr $4
169171
; AVX: vpblendd $170
170172
; AVX: vpadd
171173
; AVX: vpsrld $31
@@ -195,10 +197,12 @@ define <8 x i32> @test11(<8 x i32> %a) {
195197
ret <8 x i32> %rem
196198

197199
; AVX-LABEL: test11:
198-
; AVX: vpalignr $4
199200
; AVX: vpbroadcastd
201+
; AVX: vpalignr $4
202+
; AVX: vpalignr $4
200203
; AVX: vpmuldq
201204
; AVX: vpmuldq
205+
; AVX: vpalignr $4
202206
; AVX: vpblendd $170
203207
; AVX: vpadd
204208
; AVX: vpsrld $31

‎llvm/test/CodeGen/X86/widen_cast-1.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
; RUN: llc -march=x86 -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
33

44
; CHECK: movl
5-
; CHECK: paddd
5+
; CHECK: paddw
66
; CHECK: movlpd
77

88
; Scheduler causes produce a different instruction order
99
; ATOM: movl
10-
; ATOM: paddd
10+
; ATOM: paddw
1111
; ATOM: movlpd
1212

1313
; bitcast a v4i16 to v2i32

0 commit comments

Comments
 (0)
Please sign in to comment.