Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -65,6 +65,9 @@ let TargetPrefix = "amdgcn" in { +def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, + Intrinsic<[], [], [IntrConvergent]>; + def int_amdgcn_div_scale : GCCBuiltin<"__builtin_amdgcn_div_scale">, // 1st parameter: Numerator // 2nd parameter: Denominator Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -481,7 +481,7 @@ let hasSideEffects = 1 in { def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier", - [(int_AMDGPU_barrier_local)] + [(int_amdgcn_s_barrier)] > { let SchedRW = [WriteBarrier]; let simm16 = 0; @@ -2167,11 +2167,17 @@ // SOPP Patterns //===----------------------------------------------------------------------===// +// FIXME: These should be removed eventually def : Pat < (int_AMDGPU_barrier_global), (S_BARRIER) >; +def : Pat < + (int_AMDGPU_barrier_local), + (S_BARRIER) +>; + //===----------------------------------------------------------------------===// // VOP1 Patterns //===----------------------------------------------------------------------===// Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll @@ -0,0 +1,28 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}test_barrier: +; GCN: buffer_store_dword +; GCN: s_waitcnt +; GCN: s_barrier +define void @test_barrier(i32 addrspace(1)* %out) #0 { +entry: + %tmp = call i32 @llvm.r600.read.tidig.x() + %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp + store i32 %tmp, i32 addrspace(1)* %tmp1 + call void @llvm.amdgcn.s.barrier() + %tmp2 = call i32 @llvm.r600.read.local.size.x() + %tmp3 = sub i32 %tmp2, 1 + %tmp4 = sub i32 %tmp3, %tmp + %tmp5 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp4 + %tmp6 = load i32, i32 addrspace(1)* %tmp5 + store i32 %tmp6, i32 addrspace(1)* %tmp1 + ret void +} + +declare void @llvm.amdgcn.s.barrier() #1 +declare i32 @llvm.r600.read.tidig.x() #2 +declare i32 @llvm.r600.read.local.size.x() #2 + +attributes #0 = { nounwind } +attributes #1 = { convergent nounwind } +attributes #2 = { nounwind readnone }