Skip to content

Commit ea57c38

Browse files
committedApr 6, 2017
[AMDGPU] Eliminate barrier if workgroup size is not greater than wavefront size
If a workgroup size is known to be not greater than wavefront size the s_barrier instruction is not needed since all threads are guarantied to come to the same point at the same time. Differential Revision: https://reviews.llvm.org/D31731 llvm-svn: 299659
1 parent 3fc1225 commit ea57c38

File tree

3 files changed

+42
-1
lines changed

3 files changed

+42
-1
lines changed
 

‎llvm/lib/Target/AMDGPU/SIISelLowering.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -3159,6 +3159,17 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
31593159
SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src);
31603160
return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast);
31613161
}
3162+
case Intrinsic::amdgcn_s_barrier: {
3163+
if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
3164+
const MachineFunction &MF = DAG.getMachineFunction();
3165+
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
3166+
unsigned WGSize = ST.getFlatWorkGroupSizes(*MF.getFunction()).second;
3167+
if (WGSize <= ST.getWavefrontSize())
3168+
return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL, MVT::Other,
3169+
Op.getOperand(0)), 0);
3170+
}
3171+
return SDValue();
3172+
};
31623173
default:
31633174
return Op;
31643175
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; RUN: llc -march=amdgcn < %s | FileCheck %s
2+
3+
; CHECK-LABEL: {{^}}unknown_wgs:
4+
; CHECK: s_barrier
5+
define amdgpu_kernel void @unknown_wgs() {
6+
tail call void @llvm.amdgcn.s.barrier() #0
7+
ret void
8+
}
9+
10+
; CHECK-LABEL: {{^}}flat_wgs_attr_32_128:
11+
; CHECK: s_barrier
12+
define amdgpu_kernel void @flat_wgs_attr_32_128() #1 {
13+
tail call void @llvm.amdgcn.s.barrier() #0
14+
ret void
15+
}
16+
17+
; CHECK-LABEL: {{^}}flat_wgs_attr_32_64:
18+
; CHECK: :
19+
; CHECK-NEXT: ; wave barrier
20+
; CHECK-NEXT: s_endpgm
21+
define amdgpu_kernel void @flat_wgs_attr_32_64() #2 {
22+
tail call void @llvm.amdgcn.s.barrier() #0
23+
ret void
24+
}
25+
26+
declare void @llvm.amdgcn.s.barrier() #0
27+
28+
attributes #0 = { convergent nounwind }
29+
attributes #1 = { nounwind "amdgpu-flat-work-group-size"="32,128" }
30+
attributes #2 = { nounwind "amdgpu-flat-work-group-size"="32,64" }

‎llvm/test/CodeGen/AMDGPU/indirect-private-64.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -121,4 +121,4 @@ define amdgpu_kernel void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* n
121121
}
122122

123123
attributes #0 = { convergent nounwind }
124-
attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="64,64" }
124+
attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="64,128" }

0 commit comments

Comments
 (0)
Please sign in to comment.