Diff Detail
Event Timeline
Hi,
Is this expected that a bunch of tests with Mesa now crash because amdgcn.s.waitcnt is not found?
$ ./deqp-vk --deqp-case=dEQP-VK.compute.basic.shared_atomic_op_multiple_groups
Writing test log into TestResults.qpa
dEQP Core git-38d72fc212ba3084b4f4ddca8cd48ba77754c219 (0x38d72fc2) starting..
target implementation = 'Default'
Test case 'dEQP-VK.compute.basic.shared_atomic_op_multiple_groups'..
LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.s.waitcnt
Thanks Matt!
; ModuleID = 'mesa-shader' source_filename = "mesa-shader" target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" target triple = "amdgcn-mesa-mesa3d" @0 = external addrspace(3) global i32 define amdgpu_cs void @main(i8 addrspace(6)* inreg noalias dereferenceable(18446744073709551615), <3 x i32> inreg, i32 inreg, i32 inreg, i32 inreg, <3 x i32>) #0 { main_body: %6 = extractelement <3 x i32> %1, i32 0 %7 = extractelement <3 x i32> %1, i32 1 %8 = mul i32 %6, %7 %9 = mul i32 %8, %4 %10 = mul i32 %6, %3 %11 = add i32 %9, %10 %12 = add i32 %11, %2 %13 = mul i32 %12, 12 store i32 0, i32 addrspace(3)* @0, align 4 call void @llvm.amdgcn.s.waitcnt(i32 49279) #2 call void @llvm.amdgcn.s.barrier() #3 fence release %14 = atomicrmw add i32 addrspace(3)* @0, i32 1 syncscope("workgroup-one-as") seq_cst %15 = add i32 %13, %14 %16 = add i32 %14, 1 %17 = bitcast i8 addrspace(6)* %0 to <4 x i32> addrspace(6)*, !amdgpu.uniform !0 %18 = shl i32 %15, 2 %19 = load <4 x i32>, <4 x i32> addrspace(6)* %17, align 16, !invariant.load !0 %20 = bitcast i32 %16 to float call void @llvm.amdgcn.raw.buffer.store.f32(float %20, <4 x i32> %19, i32 %18, i32 0, i32 1) #5 ret void } ; Function Attrs: nounwind readnone speculatable declare i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() #1 ; Function Attrs: nounwind declare void @llvm.amdgcn.s.waitcnt(i32 immarg) #2 ; Function Attrs: convergent nounwind declare void @llvm.amdgcn.s.barrier() #3 ; Function Attrs: nounwind writeonly declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #4 attributes #0 = { "amdgpu-32bit-address-high-bits"="0xffff8000" "amdgpu-flat-work-group-size"="12,12" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind } attributes #3 = { convergent nounwind } attributes #4 = { nounwind writeonly } attributes #5 = { inaccessiblememonly nounwind } !0 = !{}
call void @llvm.amdgcn.s.waitcnt(i32 49279) #2
This isn't a 16-bit value, so this isn't a correct use. Previously it would have been truncated. I'm also surprised there is a user of this that doesn't just use 0? In context it looks like you should be using a fence instruction instead?
Well, it is, just a uint16. Should probably change this to checking if it' unsigned 16