Skip to content

Commit 234fcb8

Browse files
author
Changpeng Fang
committedMar 17, 2016
AMDGPU/SI: Do not generate s_waitcnt after ds_permute/ds_bpermute
Symmary: ds_permute/ds_bpermute do not read memory so s_waitcnt is not needed. Reviewers arsenm, tstellarAMD Subscribers llvm-commits, arsenm Differential Revision: http://reviews.llvm.org/D18197 llvm-svn: 263720
1 parent 79cad85 commit 234fcb8

File tree

3 files changed

+51
-3
lines changed

3 files changed

+51
-3
lines changed
 

‎llvm/lib/Target/AMDGPU/SIInstrInfo.td

+1-1
Original file line numberDiff line numberDiff line change
@@ -2519,7 +2519,7 @@ multiclass DS_1A1D_PERMUTE <bits<8> op, string opName, RegisterClass rc,
25192519
dag ins = (ins VGPR_32:$addr, rc:$data0),
25202520
string asm = opName#" $vdst, $addr, $data0"> {
25212521

2522-
let mayLoad = 0, mayStore = 0, isConvergent = 1 in {
2522+
let LGKM_CNT = 0, mayLoad = 0, mayStore = 0, isConvergent = 1 in {
25232523
def "" : DS_Pseudo <opName, outs, ins,
25242524
[(set (i32 rc:$vdst),
25252525
(node (i32 VGPR_32:$addr), (i32 rc:$data0)))]>;

‎llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll

+24
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,28 @@ define void @ds_bpermute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind
1010
ret void
1111
}
1212

13+
; FUNC-LABEL: {{^}}bpermute_no_waitcnt_test:
14+
; CHECK: s_cbranch_scc1
15+
; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
16+
; CHECK-NOT: s_waitcnt
17+
define void @bpermute_no_waitcnt_test(i32 addrspace(1)* %out, i32 %cond) {
18+
entry:
19+
20+
%tmp = icmp eq i32 %cond, 0
21+
br i1 %tmp, label %if, label %else
22+
23+
if: ; preds = %entry
24+
25+
%bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 0, i32 0) #0
26+
br label %endif
27+
28+
else: ; preds = %entry
29+
br label %endif
30+
31+
endif:
32+
%val = phi i32 [ %bpermute, %if ], [0, %else] ; preds = %else, %if
33+
store i32 %val, i32 addrspace(1)* %out, align 4
34+
ret void
35+
}
36+
1337
attributes #0 = { nounwind readnone convergent }

‎llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll

+26-2
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,33 @@ declare i32 @llvm.amdgcn.ds.permute(i32, i32) #0
55
; FUNC-LABEL: {{^}}ds_permute:
66
; CHECK: ds_permute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
77
define void @ds_permute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind {
8-
%bpermute = call i32 @llvm.amdgcn.ds.permute(i32 %index, i32 %src) #0
9-
store i32 %bpermute, i32 addrspace(1)* %out, align 4
8+
%permute = call i32 @llvm.amdgcn.ds.permute(i32 %index, i32 %src) #0
9+
store i32 %permute, i32 addrspace(1)* %out, align 4
1010
ret void
1111
}
1212

13+
; FUNC-LABEL: {{^}}permute_no_waitcnt_test:
14+
; CHECK: s_cbranch_scc1
15+
; CHECK: ds_permute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
16+
; CHECK-NOT: s_waitcnt
17+
define void @permute_no_waitcnt_test(i32 addrspace(1)* %out, i32 %cond) {
18+
entry:
19+
20+
%tmp = icmp eq i32 %cond, 0
21+
br i1 %tmp, label %if, label %else
22+
23+
if: ; preds = %entry
24+
%permute = call i32 @llvm.amdgcn.ds.permute(i32 0, i32 0) #0
25+
br label %endif
26+
27+
else: ; preds = %entry
28+
br label %endif
29+
30+
endif:
31+
%val = phi i32 [ %permute, %if ], [0, %else] ; preds = %else, %if
32+
store i32 %val, i32 addrspace(1)* %out, align 4
33+
ret void
34+
}
35+
36+
1337
attributes #0 = { nounwind readnone convergent }

0 commit comments

Comments
 (0)
Please sign in to comment.