diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp @@ -38,6 +38,7 @@ FunctionType::get(Type::getVoidTy(M.getContext()), false), GlobalValue::WeakODRLinkage, 0, InitOrFiniKernelName, &M); InitOrFiniKernel->setCallingConv(CallingConv::AMDGPU_KERNEL); + InitOrFiniKernel->addFnAttr("amdgpu-flat-work-group-size", "1,1"); if (IsCtor) InitOrFiniKernel->addFnAttr("device-init"); else @@ -58,6 +59,7 @@ // void call_init_array_callbacks() { // for (auto start = __init_array_start; start != __init_array_end; ++start) // reinterpret_cast(*start)(); +// } static void createInitOrFiniCalls(Function &F, bool IsCtor) { Module &M = *F.getParent(); LLVMContext &C = M.getContext(); diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll --- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll @@ -64,5 +64,5 @@ ; CHECK: while.end: ; CHECK-NEXT: ret void -; CHECK: attributes #[[ATTR0:[0-9]+]] = { "device-init" } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { "device-fini" } +; CHECK: attributes #[[ATTR0:[0-9]+]] = { "amdgpu-flat-work-group-size"="1,1" "device-init" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { "amdgpu-flat-work-group-size"="1,1" "device-fini" } diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll --- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll @@ -6,6 +6,7 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -s - 2>&1 | FileCheck %s -check-prefix=VISIBILITY ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -S - 2>&1 | FileCheck %s -check-prefix=SECTION ; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-lower-global-ctor-dtor=0 -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -s - 2>&1 | FileCheck %s -check-prefix=DISABLED +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - 2>&1 | FileCheck %s -check-prefix=METADATA @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }] @llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }] @@ -54,6 +55,15 @@ ; DISABLED-NOT: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.init.kd ; DISABLED-NOT: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.fini ; DISABLED-NOT: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.fini.kd +; METADATA: amdhsa.kernels: +; METADATA: .kind: init +; METADATA: .max_flat_workgroup_size: 1 +; METADATA: .name: amdgcn.device.init +; METADATA: .symbol: amdgcn.device.init.kd +; METADATA: .kind: fini +; METADATA: .max_flat_workgroup_size: 1 +; METADATA: .name: amdgcn.device.fini +; METADATA: .symbol: amdgcn.device.fini.kd define internal void @foo() { ret void @@ -63,5 +73,5 @@ ret void } -; CHECK: attributes #0 = { "device-init" } -; CHECK: attributes #1 = { "device-fini" } +; CHECK: attributes #0 = { "amdgpu-flat-work-group-size"="1,1" "device-init" } +; CHECK: attributes #1 = { "amdgpu-flat-work-group-size"="1,1" "device-fini" } diff --git a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll --- a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll @@ -57,5 +57,5 @@ ret void } -; CHECK: attributes #0 = { "device-init" } -; CHECK: attributes #1 = { "device-fini" } +; CHECK: attributes #0 = { "amdgpu-flat-work-group-size"="1,1" "device-init" } +; CHECK: attributes #1 = { "amdgpu-flat-work-group-size"="1,1" "device-fini" }