diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp @@ -31,6 +31,8 @@ StringRef InitOrFiniKernelName = "amdgcn.device.init"; if (!IsCtor) InitOrFiniKernelName = "amdgcn.device.fini"; + if (Function *F = M.getFunction(InitOrFiniKernelName)) + return F; Function *InitOrFiniKernel = Function::createWithDefaultAttr( FunctionType::get(Type::getVoidTy(M.getContext()), false), @@ -63,12 +65,18 @@ for (Value *V : GA->operands()) { auto *CS = cast(V); - IRB.CreateCall(ConstructorTy, CS->getOperand(1)); + bool AlreadyRegistered = + llvm::any_of(CS->getOperand(1)->uses(), [=](Use &U) { + if (auto *CB = dyn_cast(U.getUser())) + if (CB->getCaller() == InitOrFiniKernel) + return true; + return false; + }); + if (!AlreadyRegistered) + IRB.CreateCall(ConstructorTy, CS->getOperand(1)); } appendToUsed(M, {InitOrFiniKernel}); - - GV->eraseFromParent(); return true; } @@ -83,9 +91,7 @@ public: static char ID; AMDGPUCtorDtorLoweringLegacy() : ModulePass(ID) {} - bool runOnModule(Module &M) override { - return lowerCtorsAndDtors(M); - } + bool runOnModule(Module &M) override { return lowerCtorsAndDtors(M); } }; } // End anonymous namespace diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll --- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll @@ -18,8 +18,6 @@ @foo.alias = hidden alias void (), ptr @foo ;. -; CHECK-NOT: @llvm.global_ctors -; CHECK-NOT: @llvm.global_dtors ; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata" ; CHECK: @foo.alias = hidden alias void (), ptr @foo ;. diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll --- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll @@ -7,25 +7,17 @@ @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }] @llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }] -; CHECK-NOT: @llvm.global_ctors -; CHECK-NOT: @llvm.global_dtors - -; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0 -; CHECK-NEXT: store -; CHECK-NEXT: ret void - -; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.fini() #1 -; CHECK-NEXT: store -; CHECK-NEXT: ret void - - -; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init.1() #0 -; CHECK-NEXT: call void @foo -; CHECK-NEXT: ret void - -; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.fini.2() #1 -; CHECK-NEXT: call void @bar -; CHECK-NEXT: ret void +; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0 { +; CHECK-NEXT: store volatile i32 1, ptr addrspace(1) null +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; CHECK-LABEL: define amdgpu_kernel void @amdgcn.device.fini() #1 { +; CHECK-NEXT: store volatile i32 0, ptr addrspace(1) null +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: ret void +; CHECK-NEXT: } ; CHECK-NOT: amdgcn.device. @@ -34,12 +26,6 @@ ; CHECK-VIS: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.fini{{$}} ; CHECK-VIS: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.fini.kd{{$}} -; CHECK-VIS: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.init.1{{$}} -; CHECK-VIS: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.init.1.kd{{$}} -; CHECK-VIS: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.fini.2{{$}} -; CHECK-VIS: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.fini.2.kd{{$}} - - define internal void @foo() { ret void } diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll --- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll @@ -4,12 +4,12 @@ ; Make sure we get the same result if we run multiple times ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-ctor-dtor,amdgpu-lower-ctor-dtor < %s | FileCheck %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -s - 2>&1 | FileCheck %s -check-prefix=VISIBILITY +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -S - 2>&1 | FileCheck %s -check-prefix=SECTION @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }] @llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }] -; CHECK-NOT: @llvm.global_ctors -; CHECK-NOT: @llvm.global_dtors +; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini] ; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0 ; CHECK-NEXT: call void @foo @@ -25,6 +25,8 @@ ; VISIBILITY: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.init.kd ; VISIBILITY: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.fini ; VISIBILITY: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.fini.kd +; SECTION: .init_array.1 INIT_ARRAY {{.*}} {{.*}} 000008 00 WA 0 0 8 +; SECTION: .fini_array.1 FINI_ARRAY {{.*}} {{.*}} 000008 00 WA 0 0 8 define internal void @foo() { ret void