diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i1) #0 @@ -9,43 +10,85 @@ @global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4 ; HSA: @store_cast_0_flat_to_group_addrspacecast() #1 +;. +; HSA: @[[LDS_I32:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(3) global i32 undef, align 4 +; HSA: @[[LDS_ARR:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(3) global [256 x i32] undef, align 4 +; HSA: @[[GLOBAL_I32:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(1) global i32 undef, align 4 +; HSA: @[[GLOBAL_ARR:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(1) global [256 x i32] undef, align 4 +;. define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 { +; HSA-LABEL: define {{[^@]+}}@store_cast_0_flat_to_group_addrspacecast +; HSA-SAME: () #[[ATTR1:[0-9]+]] { +; HSA-NEXT: store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), align 4 +; HSA-NEXT: ret void +; store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*) ret void } ; HSA: @store_cast_0_group_to_flat_addrspacecast() #2 define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast() #1 { +; HSA-LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast +; HSA-SAME: () #[[ATTR2:[0-9]+]] { +; HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), align 4 +; HSA-NEXT: ret void +; store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*) ret void } ; HSA: define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #2 define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #1 { +; HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat +; HSA-SAME: () #[[ATTR2]] { +; HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*), align 4 +; HSA-NEXT: ret void +; store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*) ret void } ; HSA: @store_constant_cast_group_gv_gep_to_flat() #2 define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat() #1 { +; HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat +; HSA-SAME: () #[[ATTR2]] { +; HSA-NEXT: store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 +; HSA-NEXT: ret void +; store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) ret void } ; HSA: @store_constant_cast_global_gv_to_flat() #1 define amdgpu_kernel void @store_constant_cast_global_gv_to_flat() #1 { +; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_to_flat +; HSA-SAME: () #[[ATTR1]] { +; HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*), align 4 +; HSA-NEXT: ret void +; store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*) ret void } ; HSA: @store_constant_cast_global_gv_gep_to_flat() #1 define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() #1 { +; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_gep_to_flat +; HSA-SAME: () #[[ATTR1]] { +; HSA-NEXT: store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 +; HSA-NEXT: ret void +; store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) ret void } ; HSA: @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2 define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { +; HSA-LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat +; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { +; HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 +; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4 +; HSA-NEXT: ret void +; %val = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) store i32 %val, i32 addrspace(1)* %out ret void @@ -53,6 +96,12 @@ ; HSA: @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2 define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { +; HSA-LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat +; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { +; HSA-NEXT: [[VAL:%.*]] = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst, align 4 +; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4 +; HSA-NEXT: ret void +; %val = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst store i32 %val, i32 addrspace(1)* %out ret void @@ -60,6 +109,13 @@ ; HSA: @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2 define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { +; HSA-LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat +; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { +; HSA-NEXT: [[VAL:%.*]] = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4 +; HSA-NEXT: [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0 +; HSA-NEXT: store i32 [[VAL0]], i32 addrspace(1)* [[OUT]], align 4 +; HSA-NEXT: ret void +; %val = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, i32 addrspace(1)* %out @@ -68,6 +124,11 @@ ; HSA: @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2 define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { +; HSA-LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat +; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { +; HSA-NEXT: call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 [[OUT]], i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false) +; HSA-NEXT: ret void +; call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 %out, i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false) ret void } @@ -75,6 +136,11 @@ ; Can't just search the pointer value ; HSA: @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #2 define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #1 { +; HSA-LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat +; HSA-SAME: (i32 addrspace(4)* addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { +; HSA-NEXT: store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* [[OUT]], align 8 +; HSA-NEXT: ret void +; store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* %out ret void } @@ -82,6 +148,11 @@ ; Can't just search pointer types ; HSA: @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #2 define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #1 { +; HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat +; HSA-SAME: (i64 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { +; HSA-NEXT: store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* [[OUT]], align 4 +; HSA-NEXT: ret void +; store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* %out ret void } @@ -89,12 +160,21 @@ ; Cast group to flat, do GEP, cast back to group ; HSA: @store_constant_cast_group_gv_gep_to_flat_to_group() #2 define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group() #1 { +; HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group +; HSA-SAME: () #[[ATTR2]] { +; HSA-NEXT: store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*), align 4 +; HSA-NEXT: ret void +; store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) ret void } ; HSA: @ret_constant_cast_group_gv_gep_to_flat_to_group() #2 define i32 addrspace(3)* @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 { +; HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group +; HSA-SAME: () #[[ATTR2]] { +; HSA-NEXT: ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) +; ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) } @@ -104,3 +184,8 @@ attributes #0 = { argmemonly nounwind } attributes #1 = { nounwind } +;. +; HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn } +; HSA: attributes #[[ATTR1]] = { nounwind } +; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-queue-ptr" } +;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=HSA %s declare i32 @llvm.amdgcn.workgroup.id.x() #0 @@ -16,6 +17,12 @@ ; HSA: define void @use_workitem_id_x() #1 { define void @use_workitem_id_x() #1 { +; HSA-LABEL: define {{[^@]+}}@use_workitem_id_x +; HSA-SAME: () #[[ATTR1:[0-9]+]] { +; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4 +; HSA-NEXT: ret void +; %val = call i32 @llvm.amdgcn.workitem.id.x() store volatile i32 %val, i32 addrspace(1)* undef ret void @@ -23,6 +30,12 @@ ; HSA: define void @use_workitem_id_y() #2 { define void @use_workitem_id_y() #1 { +; HSA-LABEL: define {{[^@]+}}@use_workitem_id_y +; HSA-SAME: () #[[ATTR2:[0-9]+]] { +; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4 +; HSA-NEXT: ret void +; %val = call i32 @llvm.amdgcn.workitem.id.y() store volatile i32 %val, i32 addrspace(1)* undef ret void @@ -30,6 +43,12 @@ ; HSA: define void @use_workitem_id_z() #3 { define void @use_workitem_id_z() #1 { +; HSA-LABEL: define {{[^@]+}}@use_workitem_id_z +; HSA-SAME: () #[[ATTR3:[0-9]+]] { +; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4 +; HSA-NEXT: ret void +; %val = call i32 @llvm.amdgcn.workitem.id.z() store volatile i32 %val, i32 addrspace(1)* undef ret void @@ -37,6 +56,12 @@ ; HSA: define void @use_workgroup_id_x() #4 { define void @use_workgroup_id_x() #1 { +; HSA-LABEL: define {{[^@]+}}@use_workgroup_id_x +; HSA-SAME: () #[[ATTR4:[0-9]+]] { +; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() +; HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4 +; HSA-NEXT: ret void +; %val = call i32 @llvm.amdgcn.workgroup.id.x() store volatile i32 %val, i32 addrspace(1)* undef ret void @@ -44,6 +69,12 @@ ; HSA: define void @use_workgroup_id_y() #5 { define void @use_workgroup_id_y() #1 { +; HSA-LABEL: define {{[^@]+}}@use_workgroup_id_y +; HSA-SAME: () #[[ATTR5:[0-9]+]] { +; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() +; HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4 +; HSA-NEXT: ret void +; %val = call i32 @llvm.amdgcn.workgroup.id.y() store volatile i32 %val, i32 addrspace(1)* undef ret void @@ -51,6 +82,12 @@ ; HSA: define void @use_workgroup_id_z() #6 { define void @use_workgroup_id_z() #1 { +; HSA-LABEL: define {{[^@]+}}@use_workgroup_id_z +; HSA-SAME: () #[[ATTR6:[0-9]+]] { +; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() +; HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4 +; HSA-NEXT: ret void +; %val = call i32 @llvm.amdgcn.workgroup.id.z() store volatile i32 %val, i32 addrspace(1)* undef ret void @@ -58,6 +95,12 @@ ; HSA: define void @use_dispatch_ptr() #7 { define void @use_dispatch_ptr() #1 { +; HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr +; HSA-SAME: () #[[ATTR7:[0-9]+]] { +; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() +; HSA-NEXT: store volatile i8 addrspace(4)* [[DISPATCH_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 +; HSA-NEXT: ret void +; %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() store volatile i8 addrspace(4)* %dispatch.ptr, i8 addrspace(4)* addrspace(1)* undef ret void @@ -65,6 +108,12 @@ ; HSA: define void @use_queue_ptr() #8 { define void @use_queue_ptr() #1 { +; HSA-LABEL: define {{[^@]+}}@use_queue_ptr +; HSA-SAME: () #[[ATTR8:[0-9]+]] { +; HSA-NEXT: [[QUEUE_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() +; HSA-NEXT: store volatile i8 addrspace(4)* [[QUEUE_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 +; HSA-NEXT: ret void +; %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() store volatile i8 addrspace(4)* %queue.ptr, i8 addrspace(4)* addrspace(1)* undef ret void @@ -72,6 +121,12 @@ ; HSA: define void @use_dispatch_id() #9 { define void @use_dispatch_id() #1 { +; HSA-LABEL: define {{[^@]+}}@use_dispatch_id +; HSA-SAME: () #[[ATTR9:[0-9]+]] { +; HSA-NEXT: [[VAL:%.*]] = call i64 @llvm.amdgcn.dispatch.id() +; HSA-NEXT: store volatile i64 [[VAL]], i64 addrspace(1)* undef, align 4 +; HSA-NEXT: ret void +; %val = call i64 @llvm.amdgcn.dispatch.id() store volatile i64 %val, i64 addrspace(1)* undef ret void @@ -79,6 +134,14 @@ ; HSA: define void @use_workgroup_id_y_workgroup_id_z() #10 { define void @use_workgroup_id_y_workgroup_id_z() #1 { +; HSA-LABEL: define {{[^@]+}}@use_workgroup_id_y_workgroup_id_z +; HSA-SAME: () #[[ATTR10:[0-9]+]] { +; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() +; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() +; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* undef, align 4 +; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* undef, align 4 +; HSA-NEXT: ret void +; %val0 = call i32 @llvm.amdgcn.workgroup.id.y() %val1 = call i32 @llvm.amdgcn.workgroup.id.z() store volatile i32 %val0, i32 addrspace(1)* undef @@ -88,90 +151,167 @@ ; HSA: define void @func_indirect_use_workitem_id_x() #1 { define void @func_indirect_use_workitem_id_x() #1 { +; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_x +; HSA-SAME: () #[[ATTR1]] { +; HSA-NEXT: call void @use_workitem_id_x() +; HSA-NEXT: ret void +; call void @use_workitem_id_x() ret void } ; HSA: define void @kernel_indirect_use_workitem_id_x() #1 { define void @kernel_indirect_use_workitem_id_x() #1 { +; HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workitem_id_x +; HSA-SAME: () #[[ATTR1]] { +; HSA-NEXT: call void @use_workitem_id_x() +; HSA-NEXT: ret void +; call void @use_workitem_id_x() ret void } ; HSA: define void @func_indirect_use_workitem_id_y() #2 { define void @func_indirect_use_workitem_id_y() #1 { +; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_y +; HSA-SAME: () #[[ATTR2]] { +; HSA-NEXT: call void @use_workitem_id_y() +; HSA-NEXT: ret void +; call void @use_workitem_id_y() ret void } ; HSA: define void @func_indirect_use_workitem_id_z() #3 { define void @func_indirect_use_workitem_id_z() #1 { +; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_z +; HSA-SAME: () #[[ATTR3]] { +; HSA-NEXT: call void @use_workitem_id_z() +; HSA-NEXT: ret void +; call void @use_workitem_id_z() ret void } ; HSA: define void @func_indirect_use_workgroup_id_x() #4 { define void @func_indirect_use_workgroup_id_x() #1 { +; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_x +; HSA-SAME: () #[[ATTR4]] { +; HSA-NEXT: call void @use_workgroup_id_x() +; HSA-NEXT: ret void +; call void @use_workgroup_id_x() ret void } ; HSA: define void @kernel_indirect_use_workgroup_id_x() #4 { define void @kernel_indirect_use_workgroup_id_x() #1 { +; HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workgroup_id_x +; HSA-SAME: () #[[ATTR4]] { +; HSA-NEXT: call void @use_workgroup_id_x() +; HSA-NEXT: ret void +; call void @use_workgroup_id_x() ret void } ; HSA: define void @func_indirect_use_workgroup_id_y() #5 { define void @func_indirect_use_workgroup_id_y() #1 { +; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y +; HSA-SAME: () #[[ATTR5]] { +; HSA-NEXT: call void @use_workgroup_id_y() +; HSA-NEXT: ret void +; call void @use_workgroup_id_y() ret void } ; HSA: define void @func_indirect_use_workgroup_id_z() #6 { define void @func_indirect_use_workgroup_id_z() #1 { +; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_z +; HSA-SAME: () #[[ATTR6]] { +; HSA-NEXT: call void @use_workgroup_id_z() +; HSA-NEXT: ret void +; call void @use_workgroup_id_z() ret void } ; HSA: define void @func_indirect_indirect_use_workgroup_id_y() #5 { define void @func_indirect_indirect_use_workgroup_id_y() #1 { +; HSA-LABEL: define {{[^@]+}}@func_indirect_indirect_use_workgroup_id_y +; HSA-SAME: () #[[ATTR5]] { +; HSA-NEXT: call void @func_indirect_use_workgroup_id_y() +; HSA-NEXT: ret void +; call void @func_indirect_use_workgroup_id_y() ret void } ; HSA: define void @indirect_x2_use_workgroup_id_y() #5 { define void @indirect_x2_use_workgroup_id_y() #1 { +; HSA-LABEL: define {{[^@]+}}@indirect_x2_use_workgroup_id_y +; HSA-SAME: () #[[ATTR5]] { +; HSA-NEXT: call void @func_indirect_indirect_use_workgroup_id_y() +; HSA-NEXT: ret void +; call void @func_indirect_indirect_use_workgroup_id_y() ret void } ; HSA: define void @func_indirect_use_dispatch_ptr() #7 { define void @func_indirect_use_dispatch_ptr() #1 { +; HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr +; HSA-SAME: () #[[ATTR7]] { +; HSA-NEXT: call void @use_dispatch_ptr() +; HSA-NEXT: ret void +; call void @use_dispatch_ptr() ret void } ; HSA: define void @func_indirect_use_queue_ptr() #8 { define void @func_indirect_use_queue_ptr() #1 { +; HSA-LABEL: define {{[^@]+}}@func_indirect_use_queue_ptr +; HSA-SAME: () #[[ATTR8]] { +; HSA-NEXT: call void @use_queue_ptr() +; HSA-NEXT: ret void +; call void @use_queue_ptr() ret void } ; HSA: define void @func_indirect_use_dispatch_id() #9 { define void @func_indirect_use_dispatch_id() #1 { +; HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_id +; HSA-SAME: () #[[ATTR9]] { +; HSA-NEXT: call void @use_dispatch_id() +; HSA-NEXT: ret void +; call void @use_dispatch_id() ret void } ; HSA: define void @func_indirect_use_workgroup_id_y_workgroup_id_z() #11 { define void @func_indirect_use_workgroup_id_y_workgroup_id_z() #1 { +; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y_workgroup_id_z +; HSA-SAME: () #[[ATTR11:[0-9]+]] { +; HSA-NEXT: call void @func_indirect_use_workgroup_id_y_workgroup_id_z() +; HSA-NEXT: ret void +; call void @func_indirect_use_workgroup_id_y_workgroup_id_z() ret void } ; HSA: define void @recursive_use_workitem_id_y() #2 { define void @recursive_use_workitem_id_y() #1 { +; HSA-LABEL: define {{[^@]+}}@recursive_use_workitem_id_y +; HSA-SAME: () #[[ATTR2]] { +; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4 +; HSA-NEXT: call void @recursive_use_workitem_id_y() +; HSA-NEXT: ret void +; %val = call i32 @llvm.amdgcn.workitem.id.y() store volatile i32 %val, i32 addrspace(1)* undef call void @recursive_use_workitem_id_y() @@ -180,12 +320,23 @@ ; HSA: define void @call_recursive_use_workitem_id_y() #2 { define void @call_recursive_use_workitem_id_y() #1 { +; HSA-LABEL: define {{[^@]+}}@call_recursive_use_workitem_id_y +; HSA-SAME: () #[[ATTR2]] { +; HSA-NEXT: call void @recursive_use_workitem_id_y() +; HSA-NEXT: ret void +; call void @recursive_use_workitem_id_y() ret void } ; HSA: define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #8 { define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast +; HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR8]] { +; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* +; HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 +; HSA-NEXT: ret void +; %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* store volatile i32 0, i32 addrspace(4)* %stof ret void @@ -193,6 +344,12 @@ ; HSA: define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #12 { define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 { +; HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9 +; HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] { +; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* +; HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 +; HSA-NEXT: ret void +; %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* store volatile i32 0, i32 addrspace(4)* %stof ret void @@ -200,6 +357,13 @@ ; HSA: define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #13 { define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #2 { +; HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9 +; HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR13:[0-9]+]] { +; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* +; HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 +; HSA-NEXT: call void @func_indirect_use_queue_ptr() +; HSA-NEXT: ret void +; %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* store volatile i32 0, i32 addrspace(4)* %stof call void @func_indirect_use_queue_ptr() @@ -208,24 +372,45 @@ ; HSA: define void @indirect_use_group_to_flat_addrspacecast() #8 { define void @indirect_use_group_to_flat_addrspacecast() #1 { +; HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast +; HSA-SAME: () #[[ATTR8]] { +; HSA-NEXT: call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null) +; HSA-NEXT: ret void +; call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null) ret void } ; HSA: define void @indirect_use_group_to_flat_addrspacecast_gfx9() #11 { define void @indirect_use_group_to_flat_addrspacecast_gfx9() #1 { +; HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_gfx9 +; HSA-SAME: () #[[ATTR11]] { +; HSA-NEXT: call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null) +; HSA-NEXT: ret void +; call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null) ret void } ; HSA: define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() #8 { define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() #1 { +; HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9 +; HSA-SAME: () #[[ATTR8]] { +; HSA-NEXT: call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null) +; HSA-NEXT: ret void +; call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null) ret void } ; HSA: define void @use_kernarg_segment_ptr() #14 { define void @use_kernarg_segment_ptr() #1 { +; HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr +; HSA-SAME: () #[[ATTR14:[0-9]+]] { +; HSA-NEXT: [[KERNARG_SEGMENT_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: store volatile i8 addrspace(4)* [[KERNARG_SEGMENT_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 +; HSA-NEXT: ret void +; %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() store volatile i8 addrspace(4)* %kernarg.segment.ptr, i8 addrspace(4)* addrspace(1)* undef ret void @@ -233,12 +418,23 @@ ; HSA: define void @func_indirect_use_kernarg_segment_ptr() #11 { define void @func_indirect_use_kernarg_segment_ptr() #1 { +; HSA-LABEL: define {{[^@]+}}@func_indirect_use_kernarg_segment_ptr +; HSA-SAME: () #[[ATTR11]] { +; HSA-NEXT: call void @use_kernarg_segment_ptr() +; HSA-NEXT: ret void +; call void @use_kernarg_segment_ptr() ret void } ; HSA: define amdgpu_kernel void @kern_use_implicitarg_ptr() #15 { define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 { +; HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr +; HSA-SAME: () #[[ATTR15:[0-9]+]] { +; HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() +; HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 +; HSA-NEXT: ret void +; %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef ret void @@ -246,6 +442,12 @@ ; HSA: define void @use_implicitarg_ptr() #16 { define void @use_implicitarg_ptr() #1 { +; HSA-LABEL: define {{[^@]+}}@use_implicitarg_ptr +; HSA-SAME: () #[[ATTR16:[0-9]+]] { +; HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() +; HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 +; HSA-NEXT: ret void +; %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef ret void @@ -253,6 +455,11 @@ ; HSA: define void @func_indirect_use_implicitarg_ptr() #16 { define void @func_indirect_use_implicitarg_ptr() #1 { +; HSA-LABEL: define {{[^@]+}}@func_indirect_use_implicitarg_ptr +; HSA-SAME: () #[[ATTR16]] { +; HSA-NEXT: call void @use_implicitarg_ptr() +; HSA-NEXT: ret void +; call void @use_implicitarg_ptr() ret void } @@ -262,41 +469,76 @@ ; HSA: define internal void @defined.func() #17 { define internal void @defined.func() #3 { +; HSA-LABEL: define {{[^@]+}}@defined.func +; HSA-SAME: () #[[ATTR17:[0-9]+]] { +; HSA-NEXT: ret void +; ret void } ; HSA: define void @func_call_external() #17 { define void @func_call_external() #3 { +; HSA-LABEL: define {{[^@]+}}@func_call_external +; HSA-SAME: () #[[ATTR17]] { +; HSA-NEXT: call void @external.func() +; HSA-NEXT: ret void +; call void @external.func() ret void } ; HSA: define void @func_call_defined() #17 { define void @func_call_defined() #3 { +; HSA-LABEL: define {{[^@]+}}@func_call_defined +; HSA-SAME: () #[[ATTR17]] { +; HSA-NEXT: call void @defined.func() +; HSA-NEXT: ret void +; call void @defined.func() ret void } ; HSA: define void @func_call_asm() #18 { define void @func_call_asm() #3 { +; HSA-LABEL: define {{[^@]+}}@func_call_asm +; HSA-SAME: () #[[ATTR18:[0-9]+]] { +; HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR18]] +; HSA-NEXT: ret void +; call void asm sideeffect "", ""() #3 ret void } ; HSA: define amdgpu_kernel void @kern_call_external() #19 { define amdgpu_kernel void @kern_call_external() #3 { +; HSA-LABEL: define {{[^@]+}}@kern_call_external +; HSA-SAME: () #[[ATTR19:[0-9]+]] { +; HSA-NEXT: call void @external.func() +; HSA-NEXT: ret void +; call void @external.func() ret void } ; HSA: define amdgpu_kernel void @func_kern_defined() #19 { define amdgpu_kernel void @func_kern_defined() #3 { +; HSA-LABEL: define {{[^@]+}}@func_kern_defined +; HSA-SAME: () #[[ATTR19]] { +; HSA-NEXT: call void @defined.func() +; HSA-NEXT: ret void +; call void @defined.func() ret void } ; HSA: define i32 @use_dispatch_ptr_ret_type() #20 { define i32 @use_dispatch_ptr_ret_type() #1 { +; HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type +; HSA-SAME: () #[[ATTR20:[0-9]+]] { +; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() +; HSA-NEXT: store volatile i8 addrspace(4)* [[DISPATCH_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 +; HSA-NEXT: ret i32 0 +; %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() store volatile i8 addrspace(4)* %dispatch.ptr, i8 addrspace(4)* addrspace(1)* undef ret i32 0 @@ -304,6 +546,12 @@ ; HSA: define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #20 { define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 { +; HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func +; HSA-SAME: () #[[ATTR20]] { +; HSA-NEXT: [[F:%.*]] = call float bitcast (i32 ()* @use_dispatch_ptr_ret_type to float ()*)() +; HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 +; HSA-NEXT: ret float [[FADD]] +; %f = call float bitcast (i32()* @use_dispatch_ptr_ret_type to float()*)() %fadd = fadd float %f, 1.0 ret float %fadd @@ -335,3 +583,26 @@ ; HSA: attributes #18 = { nounwind } ; HSA: attributes #19 = { nounwind "amdgpu-calls" "uniform-work-group-size"="false" } ; HSA: attributes #20 = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" } +;. +; HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" } +; HSA: attributes #[[ATTR11]] = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #[[ATTR12]] = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #[[ATTR15]] = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" } +; HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #[[ATTR17]] = { nounwind "uniform-work-group-size"="false" } +; HSA: attributes #[[ATTR18]] = { nounwind } +; HSA: attributes #[[ATTR19]] = { nounwind "amdgpu-calls" "uniform-work-group-size"="false" } +; HSA: attributes #[[ATTR20]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" } +;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" @@ -19,6 +20,12 @@ ; HSA: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_tgid_x +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { +; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() +; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %val = call i32 @llvm.amdgcn.workgroup.id.x() store i32 %val, i32 addrspace(1)* %ptr ret void @@ -26,6 +33,12 @@ ; HSA: define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #2 { define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_tgid_y +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2:[0-9]+]] { +; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() +; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %val = call i32 @llvm.amdgcn.workgroup.id.y() store i32 %val, i32 addrspace(1)* %ptr ret void @@ -33,6 +46,14 @@ ; HSA: define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 { define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@multi_use_tgid_y +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] { +; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() +; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() +; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %val0 = call i32 @llvm.amdgcn.workgroup.id.y() store volatile i32 %val0, i32 addrspace(1)* %ptr %val1 = call i32 @llvm.amdgcn.workgroup.id.y() @@ -42,6 +63,14 @@ ; HSA: define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 { define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_tgid_x_y +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] { +; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() +; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() +; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %val0 = call i32 @llvm.amdgcn.workgroup.id.x() %val1 = call i32 @llvm.amdgcn.workgroup.id.y() store volatile i32 %val0, i32 addrspace(1)* %ptr @@ -51,6 +80,12 @@ ; HSA: define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #3 { define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_tgid_z +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { +; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() +; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %val = call i32 @llvm.amdgcn.workgroup.id.z() store i32 %val, i32 addrspace(1)* %ptr ret void @@ -58,6 +93,14 @@ ; HSA: define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 { define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_tgid_x_z +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3]] { +; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() +; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() +; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %val0 = call i32 @llvm.amdgcn.workgroup.id.x() %val1 = call i32 @llvm.amdgcn.workgroup.id.z() store volatile i32 %val0, i32 addrspace(1)* %ptr @@ -67,6 +110,14 @@ ; HSA: define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 { define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_tgid_y_z +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { +; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() +; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() +; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %val0 = call i32 @llvm.amdgcn.workgroup.id.y() %val1 = call i32 @llvm.amdgcn.workgroup.id.z() store volatile i32 %val0, i32 addrspace(1)* %ptr @@ -76,6 +127,16 @@ ; HSA: define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 { define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_tgid_x_y_z +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4]] { +; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() +; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() +; HSA-NEXT: [[VAL2:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() +; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %val0 = call i32 @llvm.amdgcn.workgroup.id.x() %val1 = call i32 @llvm.amdgcn.workgroup.id.y() %val2 = call i32 @llvm.amdgcn.workgroup.id.z() @@ -87,6 +148,12 @@ ; HSA: define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_tidig_x +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { +; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %val = call i32 @llvm.amdgcn.workitem.id.x() store i32 %val, i32 addrspace(1)* %ptr ret void @@ -94,6 +161,12 @@ ; HSA: define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #5 { define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_tidig_y +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR5:[0-9]+]] { +; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %val = call i32 @llvm.amdgcn.workitem.id.y() store i32 %val, i32 addrspace(1)* %ptr ret void @@ -101,6 +174,12 @@ ; HSA: define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #6 { define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_tidig_z +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR6:[0-9]+]] { +; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %val = call i32 @llvm.amdgcn.workitem.id.z() store i32 %val, i32 addrspace(1)* %ptr ret void @@ -108,6 +187,14 @@ ; HSA: define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_tidig_x_tgid_x +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { +; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() +; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %val0 = call i32 @llvm.amdgcn.workitem.id.x() %val1 = call i32 @llvm.amdgcn.workgroup.id.x() store volatile i32 %val0, i32 addrspace(1)* %ptr @@ -117,6 +204,14 @@ ; HSA: define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 { define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_tidig_y_tgid_y +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR7:[0-9]+]] { +; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() +; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %val0 = call i32 @llvm.amdgcn.workitem.id.y() %val1 = call i32 @llvm.amdgcn.workgroup.id.y() store volatile i32 %val0, i32 addrspace(1)* %ptr @@ -126,6 +221,16 @@ ; HSA: define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 { define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_tidig_x_y_z +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR8:[0-9]+]] { +; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; HSA-NEXT: [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %val0 = call i32 @llvm.amdgcn.workitem.id.x() %val1 = call i32 @llvm.amdgcn.workitem.id.y() %val2 = call i32 @llvm.amdgcn.workitem.id.z() @@ -137,6 +242,22 @@ ; HSA: define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #9 { define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_all_workitems +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR9:[0-9]+]] { +; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; HSA-NEXT: [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; HSA-NEXT: [[VAL3:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() +; HSA-NEXT: [[VAL4:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() +; HSA-NEXT: [[VAL5:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() +; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: store volatile i32 [[VAL3]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: store volatile i32 [[VAL4]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: store volatile i32 [[VAL5]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %val0 = call i32 @llvm.amdgcn.workitem.id.x() %val1 = call i32 @llvm.amdgcn.workitem.id.y() %val2 = call i32 @llvm.amdgcn.workitem.id.z() @@ -154,6 +275,14 @@ ; HSA: define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 { define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR10:[0-9]+]] { +; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() +; HSA-NEXT: [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)* +; HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4 +; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* %val = load i32, i32 addrspace(4)* %bc @@ -163,6 +292,14 @@ ; HSA: define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 { define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_queue_ptr +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR11:[0-9]+]] { +; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() +; HSA-NEXT: [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)* +; HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4 +; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* %val = load i32, i32 addrspace(4)* %bc @@ -172,6 +309,14 @@ ; HSA: define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #12 { define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] { +; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)* +; HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4 +; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; HSA-NEXT: ret void +; %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* %val = load i32, i32 addrspace(4)* %bc @@ -181,6 +326,12 @@ ; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 { define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast +; HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR11]] { +; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32* +; HSA-NEXT: store volatile i32 0, i32* [[STOF]], align 4 +; HSA-NEXT: ret void +; %stof = addrspacecast i32 addrspace(3)* %ptr to i32* store volatile i32 0, i32* %stof ret void @@ -188,6 +339,12 @@ ; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #11 { define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_private_to_flat_addrspacecast +; HSA-SAME: (i32 addrspace(5)* [[PTR:%.*]]) #[[ATTR11]] { +; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(5)* [[PTR]] to i32* +; HSA-NEXT: store volatile i32 0, i32* [[STOF]], align 4 +; HSA-NEXT: ret void +; %stof = addrspacecast i32 addrspace(5)* %ptr to i32* store volatile i32 0, i32* %stof ret void @@ -195,6 +352,12 @@ ; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 { define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_flat_to_group_addrspacecast +; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] { +; HSA-NEXT: [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(3)* +; HSA-NEXT: store volatile i32 0, i32 addrspace(3)* [[FTOS]], align 4 +; HSA-NEXT: ret void +; %ftos = addrspacecast i32* %ptr to i32 addrspace(3)* store volatile i32 0, i32 addrspace(3)* %ftos ret void @@ -202,6 +365,12 @@ ; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 { define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_flat_to_private_addrspacecast +; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] { +; HSA-NEXT: [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(5)* +; HSA-NEXT: store volatile i32 0, i32 addrspace(5)* [[FTOS]], align 4 +; HSA-NEXT: ret void +; %ftos = addrspacecast i32* %ptr to i32 addrspace(5)* store volatile i32 0, i32 addrspace(5)* %ftos ret void @@ -210,6 +379,12 @@ ; No-op addrspacecast should not use queue ptr ; HSA: define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast +; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { +; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(1)* [[PTR]] to i32* +; HSA-NEXT: store volatile i32 0, i32* [[STOF]], align 4 +; HSA-NEXT: ret void +; %stof = addrspacecast i32 addrspace(1)* %ptr to i32* store volatile i32 0, i32* %stof ret void @@ -217,6 +392,12 @@ ; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 { define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast +; HSA-SAME: (i32 addrspace(4)* [[PTR:%.*]]) #[[ATTR1]] { +; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(4)* [[PTR]] to i32* +; HSA-NEXT: [[LD:%.*]] = load volatile i32, i32* [[STOF]], align 4 +; HSA-NEXT: ret void +; %stof = addrspacecast i32 addrspace(4)* %ptr to i32* %ld = load volatile i32, i32* %stof ret void @@ -224,6 +405,12 @@ ; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 { define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_flat_to_global_addrspacecast +; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] { +; HSA-NEXT: [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(1)* +; HSA-NEXT: store volatile i32 0, i32 addrspace(1)* [[FTOS]], align 4 +; HSA-NEXT: ret void +; %ftos = addrspacecast i32* %ptr to i32 addrspace(1)* store volatile i32 0, i32 addrspace(1)* %ftos ret void @@ -231,6 +418,12 @@ ; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 { define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_flat_to_constant_addrspacecast +; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] { +; HSA-NEXT: [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(4)* +; HSA-NEXT: [[LD:%.*]] = load volatile i32, i32 addrspace(4)* [[FTOS]], align 4 +; HSA-NEXT: ret void +; %ftos = addrspacecast i32* %ptr to i32 addrspace(4)* %ld = load volatile i32, i32 addrspace(4)* %ftos ret void @@ -238,6 +431,13 @@ ; HSA: define amdgpu_kernel void @use_is_shared(i8* %ptr) #11 { define amdgpu_kernel void @use_is_shared(i8* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_is_shared +; HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] { +; HSA-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[PTR]]) +; HSA-NEXT: [[EXT:%.*]] = zext i1 [[IS_SHARED]] to i32 +; HSA-NEXT: store i32 [[EXT]], i32 addrspace(1)* undef, align 4 +; HSA-NEXT: ret void +; %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr) %ext = zext i1 %is.shared to i32 store i32 %ext, i32 addrspace(1)* undef @@ -246,6 +446,13 @@ ; HSA: define amdgpu_kernel void @use_is_private(i8* %ptr) #11 { define amdgpu_kernel void @use_is_private(i8* %ptr) #1 { +; HSA-LABEL: define {{[^@]+}}@use_is_private +; HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] { +; HSA-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(i8* [[PTR]]) +; HSA-NEXT: [[EXT:%.*]] = zext i1 [[IS_PRIVATE]] to i32 +; HSA-NEXT: store i32 [[EXT]], i32 addrspace(1)* undef, align 4 +; HSA-NEXT: ret void +; %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr) %ext = zext i1 %is.private to i32 store i32 %ext, i32 addrspace(1)* undef @@ -254,6 +461,12 @@ ; HSA: define amdgpu_kernel void @use_alloca() #13 { define amdgpu_kernel void @use_alloca() #1 { +; HSA-LABEL: define {{[^@]+}}@use_alloca +; HSA-SAME: () #[[ATTR13:[0-9]+]] { +; HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) +; HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4 +; HSA-NEXT: ret void +; %alloca = alloca i32, addrspace(5) store i32 0, i32 addrspace(5)* %alloca ret void @@ -261,6 +474,15 @@ ; HSA: define amdgpu_kernel void @use_alloca_non_entry_block() #13 { define amdgpu_kernel void @use_alloca_non_entry_block() #1 { +; HSA-LABEL: define {{[^@]+}}@use_alloca_non_entry_block +; HSA-SAME: () #[[ATTR13]] { +; HSA-NEXT: entry: +; HSA-NEXT: br label [[BB:%.*]] +; HSA: bb: +; HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) +; HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4 +; HSA-NEXT: ret void +; entry: br label %bb @@ -272,6 +494,12 @@ ; HSA: define void @use_alloca_func() #13 { define void @use_alloca_func() #1 { +; HSA-LABEL: define {{[^@]+}}@use_alloca_func +; HSA-SAME: () #[[ATTR13]] { +; HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) +; HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4 +; HSA-NEXT: ret void +; %alloca = alloca i32, addrspace(5) store i32 0, i32 addrspace(5)* %alloca ret void @@ -294,3 +522,19 @@ ; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" } ; HSA: attributes #12 = { nounwind "amdgpu-kernarg-segment-ptr" } ; HSA: attributes #13 = { nounwind "amdgpu-stack-objects" } +;. +; HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; HSA: attributes #[[ATTR1]] = { nounwind } +; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" } +; HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" } +; HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" } +; HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" } +; HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" } +; HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" } +; HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +; HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +; HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" } +; HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" } +; HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-kernarg-segment-ptr" } +; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-stack-objects" } +;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=NOHSA -check-prefix=ALL %s declare i32 @llvm.r600.read.tgid.x() #0 @@ -14,6 +15,12 @@ ; ALL: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@use_tgid_x +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { +; NOHSA-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.x() +; NOHSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val = call i32 @llvm.r600.read.tgid.x() store i32 %val, i32 addrspace(1)* %ptr ret void @@ -21,6 +28,12 @@ ; ALL: define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #2 { define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@use_tgid_y +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2:[0-9]+]] { +; NOHSA-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.y() +; NOHSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val = call i32 @llvm.r600.read.tgid.y() store i32 %val, i32 addrspace(1)* %ptr ret void @@ -28,6 +41,14 @@ ; ALL: define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 { define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@multi_use_tgid_y +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] { +; NOHSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.y() +; NOHSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y() +; NOHSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val0 = call i32 @llvm.r600.read.tgid.y() store volatile i32 %val0, i32 addrspace(1)* %ptr %val1 = call i32 @llvm.r600.read.tgid.y() @@ -37,6 +58,14 @@ ; ALL: define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 { define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@use_tgid_x_y +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] { +; NOHSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x() +; NOHSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y() +; NOHSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val0 = call i32 @llvm.r600.read.tgid.x() %val1 = call i32 @llvm.r600.read.tgid.y() store volatile i32 %val0, i32 addrspace(1)* %ptr @@ -46,6 +75,12 @@ ; ALL: define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #3 { define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@use_tgid_z +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { +; NOHSA-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.z() +; NOHSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val = call i32 @llvm.r600.read.tgid.z() store i32 %val, i32 addrspace(1)* %ptr ret void @@ -53,6 +88,14 @@ ; ALL: define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 { define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@use_tgid_x_z +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3]] { +; NOHSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x() +; NOHSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.z() +; NOHSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val0 = call i32 @llvm.r600.read.tgid.x() %val1 = call i32 @llvm.r600.read.tgid.z() store volatile i32 %val0, i32 addrspace(1)* %ptr @@ -62,6 +105,14 @@ ; ALL: define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 { define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@use_tgid_y_z +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { +; NOHSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.y() +; NOHSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.z() +; NOHSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val0 = call i32 @llvm.r600.read.tgid.y() %val1 = call i32 @llvm.r600.read.tgid.z() store volatile i32 %val0, i32 addrspace(1)* %ptr @@ -71,6 +122,16 @@ ; ALL: define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 { define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@use_tgid_x_y_z +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4]] { +; NOHSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x() +; NOHSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y() +; NOHSA-NEXT: [[VAL2:%.*]] = call i32 @llvm.r600.read.tgid.z() +; NOHSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val0 = call i32 @llvm.r600.read.tgid.x() %val1 = call i32 @llvm.r600.read.tgid.y() %val2 = call i32 @llvm.r600.read.tgid.z() @@ -82,6 +143,12 @@ ; ALL: define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@use_tidig_x +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { +; NOHSA-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.x() +; NOHSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val = call i32 @llvm.r600.read.tidig.x() store i32 %val, i32 addrspace(1)* %ptr ret void @@ -89,6 +156,12 @@ ; ALL: define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #5 { define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@use_tidig_y +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR5:[0-9]+]] { +; NOHSA-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.y() +; NOHSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val = call i32 @llvm.r600.read.tidig.y() store i32 %val, i32 addrspace(1)* %ptr ret void @@ -96,6 +169,12 @@ ; ALL: define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #6 { define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@use_tidig_z +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR6:[0-9]+]] { +; NOHSA-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.z() +; NOHSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val = call i32 @llvm.r600.read.tidig.z() store i32 %val, i32 addrspace(1)* %ptr ret void @@ -103,6 +182,14 @@ ; ALL: define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@use_tidig_x_tgid_x +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { +; NOHSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x() +; NOHSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.x() +; NOHSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val0 = call i32 @llvm.r600.read.tidig.x() %val1 = call i32 @llvm.r600.read.tgid.x() store volatile i32 %val0, i32 addrspace(1)* %ptr @@ -112,6 +199,14 @@ ; ALL: define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 { define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@use_tidig_y_tgid_y +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR7:[0-9]+]] { +; NOHSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.y() +; NOHSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y() +; NOHSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val0 = call i32 @llvm.r600.read.tidig.y() %val1 = call i32 @llvm.r600.read.tgid.y() store volatile i32 %val0, i32 addrspace(1)* %ptr @@ -121,6 +216,16 @@ ; ALL: define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 { define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@use_tidig_x_y_z +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR8:[0-9]+]] { +; NOHSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x() +; NOHSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tidig.y() +; NOHSA-NEXT: [[VAL2:%.*]] = call i32 @llvm.r600.read.tidig.z() +; NOHSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val0 = call i32 @llvm.r600.read.tidig.x() %val1 = call i32 @llvm.r600.read.tidig.y() %val2 = call i32 @llvm.r600.read.tidig.z() @@ -132,6 +237,22 @@ ; ALL: define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #9 { define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@use_all_workitems +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR9:[0-9]+]] { +; NOHSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x() +; NOHSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tidig.y() +; NOHSA-NEXT: [[VAL2:%.*]] = call i32 @llvm.r600.read.tidig.z() +; NOHSA-NEXT: [[VAL3:%.*]] = call i32 @llvm.r600.read.tgid.x() +; NOHSA-NEXT: [[VAL4:%.*]] = call i32 @llvm.r600.read.tgid.y() +; NOHSA-NEXT: [[VAL5:%.*]] = call i32 @llvm.r600.read.tgid.z() +; NOHSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: store volatile i32 [[VAL3]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: store volatile i32 [[VAL4]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: store volatile i32 [[VAL5]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val0 = call i32 @llvm.r600.read.tidig.x() %val1 = call i32 @llvm.r600.read.tidig.y() %val2 = call i32 @llvm.r600.read.tidig.z() @@ -150,6 +271,12 @@ ; HSA: define amdgpu_kernel void @use_get_local_size_x(i32 addrspace(1)* %ptr) #10 { ; NOHSA: define amdgpu_kernel void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 { define amdgpu_kernel void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@use_get_local_size_x +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { +; NOHSA-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.x() +; NOHSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val = call i32 @llvm.r600.read.local.size.x() store i32 %val, i32 addrspace(1)* %ptr ret void @@ -158,6 +285,12 @@ ; HSA: define amdgpu_kernel void @use_get_local_size_y(i32 addrspace(1)* %ptr) #10 { ; NOHSA: define amdgpu_kernel void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 { define amdgpu_kernel void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@use_get_local_size_y +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { +; NOHSA-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.y() +; NOHSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val = call i32 @llvm.r600.read.local.size.y() store i32 %val, i32 addrspace(1)* %ptr ret void @@ -166,6 +299,12 @@ ; HSA: define amdgpu_kernel void @use_get_local_size_z(i32 addrspace(1)* %ptr) #10 { ; NOHSA: define amdgpu_kernel void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 { define amdgpu_kernel void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 { +; NOHSA-LABEL: define {{[^@]+}}@use_get_local_size_z +; NOHSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { +; NOHSA-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.z() +; NOHSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 +; NOHSA-NEXT: ret void +; %val = call i32 @llvm.r600.read.local.size.z() store i32 %val, i32 addrspace(1)* %ptr ret void @@ -185,3 +324,15 @@ ; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } ; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } ; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" } +;. +; NOHSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; NOHSA: attributes #[[ATTR1]] = { nounwind } +; NOHSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" } +; NOHSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" } +; NOHSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" } +; NOHSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" } +; NOHSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" } +; NOHSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" } +; NOHSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +; NOHSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +;. diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll @@ -1,21 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: define internal void @indirect() #0 { define internal void @indirect() { - ret void +; GCN-LABEL: define {{[^@]+}}@indirect +; GCN-SAME: () #[[ATTR0:[0-9]+]] { +; GCN-NEXT: ret void +; + ret void } ; GCN-LABEL: define internal void @direct() #1 { define internal void @direct() { - %fptr = alloca void()* - store void()* @indirect, void()** %fptr - %fp = load void()*, void()** %fptr - call void %fp() - ret void +; GCN-LABEL: define {{[^@]+}}@direct +; GCN-SAME: () #[[ATTR1:[0-9]+]] { +; GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8 +; GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8 +; GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8 +; GCN-NEXT: call void [[FP]]() +; GCN-NEXT: ret void +; + %fptr = alloca void()* + store void()* @indirect, void()** %fptr + %fp = load void()*, void()** %fptr + call void %fp() + ret void } ; GCN-LABEL: define amdgpu_kernel void @test_direct_indirect_call() #2 { define amdgpu_kernel void @test_direct_indirect_call() { +; GCN-LABEL: define {{[^@]+}}@test_direct_indirect_call +; GCN-SAME: () #[[ATTR2:[0-9]+]] { +; GCN-NEXT: call void @direct() +; GCN-NEXT: ret void +; call void @direct() ret void } @@ -23,3 +41,8 @@ ; attributes #0 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } ; attributes #1 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ; attributes #2 = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +;. +; GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +; GCN: attributes #[[ATTR1]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; GCN: attributes #[[ATTR2]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +;. diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll --- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll +++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll @@ -1,17 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: define internal void @indirect() #0 { define internal void @indirect() { - ret void +; GCN-LABEL: define {{[^@]+}}@indirect +; GCN-SAME: () #[[ATTR0:[0-9]+]] { +; GCN-NEXT: ret void +; + ret void } ; GCN-LABEL: define amdgpu_kernel void @test_simple_indirect_call() #1 { define amdgpu_kernel void @test_simple_indirect_call() #0 { - %fptr = alloca void()* - store void()* @indirect, void()** %fptr - %fp = load void()*, void()** %fptr - call void %fp() - ret void +; GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call +; GCN-SAME: () #[[ATTR1:[0-9]+]] { +; GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8 +; GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8 +; GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8 +; GCN-NEXT: call void [[FP]]() +; GCN-NEXT: ret void +; + %fptr = alloca void()* + store void()* @indirect, void()** %fptr + %fp = load void()*, void()** %fptr + call void %fp() + ret void } attributes #0 = { "amdgpu-dispatch-id" } @@ -20,3 +33,7 @@ attributes #0 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } attributes #1 = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +;. +; GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +; GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +;. diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s @@ -7,7 +8,11 @@ ; GCN-LABEL: define internal void @indirect() #0 { ; GFX9-LABEL: {{^}}indirect: define internal void @indirect() { - ret void +; GCN-LABEL: define {{[^@]+}}@indirect +; GCN-SAME: () #[[ATTR0:[0-9]+]] { +; GCN-NEXT: ret void +; + ret void } ; GCN-LABEL: define amdgpu_kernel void @test_simple_indirect_call() #1 { @@ -39,13 +44,26 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX9-NEXT: s_endpgm define amdgpu_kernel void @test_simple_indirect_call() { - %fptr = alloca void()*, addrspace(5) - %fptr.cast = addrspacecast void()* addrspace(5)* %fptr to void()** - store void()* @indirect, void()** %fptr.cast - %fp = load void()*, void()** %fptr.cast - call void %fp() - ret void +; GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call +; GCN-SAME: () #[[ATTR1:[0-9]+]] { +; GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8, addrspace(5) +; GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast void ()* addrspace(5)* [[FPTR]] to void ()** +; GCN-NEXT: store void ()* @indirect, void ()** [[FPTR_CAST]], align 8 +; GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR_CAST]], align 8 +; GCN-NEXT: call void [[FP]]() +; GCN-NEXT: ret void +; + %fptr = alloca void()*, addrspace(5) + %fptr.cast = addrspacecast void()* addrspace(5)* %fptr to void()** + store void()* @indirect, void()** %fptr.cast + %fp = load void()*, void()** %fptr.cast + call void %fp() + ret void } ; attributes #0 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } ; attributes #1 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +;. +; GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +; GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll @@ -1,19 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s ; If the kernel does not have the uniform-work-group-attribute, set both callee and caller as false -; CHECK: define void @foo() #[[FOO:[0-9]+]] { define void @foo() #0 { +; CHECK-LABEL: define {{[^@]+}}@foo +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret void +; ret void } -; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] { define amdgpu_kernel void @kernel1() #1 { +; CHECK-LABEL: define {{[^@]+}}@kernel1 +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: ret void +; call void @foo() ret void } attributes #0 = { "uniform-work-group-size"="true" } -; CHECK: attributes #[[FOO]] = { "uniform-work-group-size"="false" } -; CHECK: attributes #[[KERNEL1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +;. +; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll @@ -1,25 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s ; Test to verify if the attribute gets propagated across nested function calls -; CHECK: define void @func1() #[[FUNC:[0-9]+]] { define void @func1() #0 { +; CHECK-LABEL: define {{[^@]+}}@func1 +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret void +; ret void } -; CHECK: define void @func2() #[[FUNC]] { define void @func2() #1 { +; CHECK-LABEL: define {{[^@]+}}@func2 +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: call void @func1() +; CHECK-NEXT: ret void +; call void @func1() ret void } -; CHECK: define amdgpu_kernel void @kernel3() #[[KERNEL:[0-9]+]] { define amdgpu_kernel void @kernel3() #2 { +; CHECK-LABEL: define {{[^@]+}}@kernel3 +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: call void @func2() +; CHECK-NEXT: ret void +; call void @func2() ret void } attributes #2 = { "uniform-work-group-size"="true" } -; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="true" } -; CHECK: attributes #[[KERNEL]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +;. +; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll @@ -1,26 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s ; Two kernels with different values of the uniform-work-group-attribute call the same function -; CHECK: define void @func() #[[FUNC:[0-9]+]] { define void @func() #0 { +; CHECK-LABEL: define {{[^@]+}}@func +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret void +; ret void } -; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] { define amdgpu_kernel void @kernel1() #1 { +; CHECK-LABEL: define {{[^@]+}}@kernel1 +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: call void @func() +; CHECK-NEXT: ret void +; call void @func() ret void } -; CHECK: define amdgpu_kernel void @kernel2() #[[KERNEL2:[0-9]+]] { define amdgpu_kernel void @kernel2() #2 { +; CHECK-LABEL: define {{[^@]+}}@kernel2 +; CHECK-SAME: () #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: call void @func() +; CHECK-NEXT: ret void +; call void @func() ret void } attributes #1 = { "uniform-work-group-size"="true" } -; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="false" } -; CHECK: attributes #[[KERNEL1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } -; CHECK: attributes #[[KERNEL2]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +;. +; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll @@ -1,25 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s ; Propagate the uniform-work-group-attribute from the kernel to callee if it doesn't have it -; CHECK: define void @func() #[[FUNC:[0-9]+]] { define void @func() #0 { +; CHECK-LABEL: define {{[^@]+}}@func +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret void +; ret void } -; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] { define amdgpu_kernel void @kernel1() #1 { +; CHECK-LABEL: define {{[^@]+}}@kernel1 +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: call void @func() +; CHECK-NEXT: ret void +; call void @func() ret void } ; External declaration of a function -; CHECK: define weak_odr void @weak_func() #[[FUNC]] { define weak_odr void @weak_func() #0 { +; CHECK-LABEL: define {{[^@]+}}@weak_func +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: ret void +; ret void } -; CHECK: define amdgpu_kernel void @kernel2() #[[KERNEL2:[0-9]+]] { define amdgpu_kernel void @kernel2() #2 { +; CHECK-LABEL: define {{[^@]+}}@kernel2 +; CHECK-SAME: () #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: call void @weak_func() +; CHECK-NEXT: ret void +; call void @weak_func() ret void } @@ -28,6 +43,8 @@ attributes #1 = { "uniform-work-group-size"="false" } attributes #2 = { "uniform-work-group-size"="true" } -; CHECK: attributes #[[FUNC]] = { nounwind "uniform-work-group-size"="false" } -; CHECK: attributes #[[KERNEL1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } -; CHECK: attributes #[[KERNEL2]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +;. +; CHECK: attributes #[[ATTR0]] = { nounwind "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll @@ -1,10 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s ; Test to ensure recursive functions exhibit proper behaviour ; Test to generate fibonacci numbers -; CHECK: define i32 @fib(i32 %n) #[[FIB:[0-9]+]] { define i32 @fib(i32 %n) #0 { +; CHECK-LABEL: define {{[^@]+}}@fib +; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]] +; CHECK: cont1: +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1 +; CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]] +; CHECK: cont2: +; CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1 +; CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib(i32 [[NM1]]) +; CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2 +; CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib(i32 [[NM2]]) +; CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]] +; CHECK-NEXT: ret i32 [[RETVAL]] +; CHECK: exit: +; CHECK-NEXT: ret i32 1 +; %cmp1 = icmp eq i32 %n, 0 br i1 %cmp1, label %exit, label %cont1 @@ -25,8 +42,13 @@ ret i32 1 } -; CHECK: define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #[[KERNEL:[0-9]+]] { define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 { +; CHECK-LABEL: define {{[^@]+}}@kernel +; CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5) +; CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[M]], align 4 +; CHECK-NEXT: ret void +; %r = call i32 @fib(i32 5) store i32 %r, i32 addrspace(1)* %m ret void @@ -34,5 +56,7 @@ attributes #1 = { "uniform-work-group-size"="true" } -; CHECK: attributes #[[FIB]] = { "uniform-work-group-size"="true" } -; CHECK: attributes #[[KERNEL]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +;. +; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll @@ -1,30 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck %s -; CHECK: define void @func1() #[[FUNC:[0-9]+]] { define void @func1() { +; CHECK-LABEL: define {{[^@]+}}@func1 +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret void +; ret void } -; CHECK: define void @func4() #[[FUNC]] { define void @func4() { +; CHECK-LABEL: define {{[^@]+}}@func4 +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: ret void +; ret void } -; CHECK: define void @func2() #[[FUNC]] { define void @func2() #0 { +; CHECK-LABEL: define {{[^@]+}}@func2 +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: call void @func4() +; CHECK-NEXT: call void @func1() +; CHECK-NEXT: ret void +; call void @func4() call void @func1() ret void } -; CHECK: define void @func3() #[[FUNC]] { define void @func3() { +; CHECK-LABEL: define {{[^@]+}}@func3 +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: call void @func1() +; CHECK-NEXT: ret void +; call void @func1() ret void } -; CHECK: define amdgpu_kernel void @kernel3() #[[FUNC:[0-9]+]] { define amdgpu_kernel void @kernel3() #0 { +; CHECK-LABEL: define {{[^@]+}}@kernel3 +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: call void @func2() +; CHECK-NEXT: call void @func3() +; CHECK-NEXT: ret void +; call void @func2() call void @func3() ret void @@ -32,4 +53,7 @@ attributes #0 = { "uniform-work-group-size"="false" } -; CHECK: attributes #[[FUNC]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +;. +; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +;.