diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp @@ -27,20 +27,24 @@ namespace { +enum class AddressSpace : unsigned { + Generic = 0, + Global = 1, + Shared = 3, + Constant = 4, + Private = 5, +}; + static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) { StringRef InitOrFiniKernelName = "amdgcn.device.init"; if (!IsCtor) InitOrFiniKernelName = "amdgcn.device.fini"; - if (Function *F = M.getFunction(InitOrFiniKernelName)) - return F; + if (M.getFunction(InitOrFiniKernelName)) + return nullptr; Function *InitOrFiniKernel = Function::createWithDefaultAttr( FunctionType::get(Type::getVoidTy(M.getContext()), false), - GlobalValue::ExternalLinkage, 0, InitOrFiniKernelName, &M); - BasicBlock *InitOrFiniKernelBB = - BasicBlock::Create(M.getContext(), "", InitOrFiniKernel); - ReturnInst::Create(M.getContext(), InitOrFiniKernelBB); - + GlobalValue::WeakODRLinkage, 0, InitOrFiniKernelName, &M); InitOrFiniKernel->setCallingConv(CallingConv::AMDGPU_KERNEL); if (IsCtor) InitOrFiniKernel->addFnAttr("device-init"); @@ -49,6 +53,70 @@ return InitOrFiniKernel; } +// The linker will provide the associated symbols to allow us to traverse the +// global constructors in priority order. We create the IR required to call each +// callback in this section. This is equivalent to the following code. +// +// extern "C" void * __init_array_start[]; +// extern "C" void * __init_array_end[]; +// +// using InitCallback = void(); +// +// void call_init_array_callbacks() { +// for (auto start = __init_array_start; start != __init_array_end; ++start) +// reinterpret_cast(*start)(); +static void createInitOrFiniCalls(Function &F, bool IsCtor) { + Module &M = *F.getParent(); + LLVMContext &C = M.getContext(); + + IRBuilder<> IRB(BasicBlock::Create(C, "entry", &F)); + auto *LoopBB = BasicBlock::Create(C, "while.entry", &F); + auto *ExitBB = BasicBlock::Create(C, "while.end", &F); + Type *PtrTy = IRB.getPtrTy(static_cast(AddressSpace::Global)); + + auto *Begin = M.getOrInsertGlobal( + IsCtor ? "__init_array_start" : "__fini_array_start", + ArrayType::get(PtrTy, 0), [&]() { + return new GlobalVariable( + M, ArrayType::get(PtrTy, 0), + /*isConstant=*/true, GlobalValue::ExternalLinkage, + /*Initializer=*/nullptr, + IsCtor ? "__init_array_start" : "__fini_array_start", + /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal, + /*AddressSpace=*/1); + }); + auto *End = M.getOrInsertGlobal( + IsCtor ? "__init_array_end" : "__fini_array_end", + ArrayType::get(PtrTy, 0), [&]() { + return new GlobalVariable( + M, ArrayType::get(PtrTy, 0), + /*isConstant=*/true, GlobalValue::ExternalLinkage, + /*Initializer=*/nullptr, + IsCtor ? "__init_array_end" : "__fini_array_end", + /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal, + /*AddressSpace=*/1); + }); + + // The constructor type is suppoed to allow using the argument vectors, but + // for now we just call them with no arguments. + auto *CallBackTy = FunctionType::get(IRB.getVoidTy(), {}); + + IRB.CreateCondBr(IRB.CreateICmpNE(Begin, End), LoopBB, ExitBB); + IRB.SetInsertPoint(LoopBB); + auto *CallBackPHI = IRB.CreatePHI(PtrTy, 2, "ptr"); + auto *CallBack = IRB.CreateLoad(CallBackTy->getPointerTo(F.getAddressSpace()), + CallBackPHI, "callback"); + IRB.CreateCall(CallBackTy, CallBack); + auto *NewCallBack = IRB.CreateInBoundsGEP( + PtrTy, CallBackPHI, ConstantInt::get(IRB.getInt64Ty(), 1), "next"); + auto *EndCmp = IRB.CreateICmpEQ(NewCallBack, End, "end"); + CallBackPHI->addIncoming(Begin, &F.getEntryBlock()); + CallBackPHI->addIncoming(NewCallBack, LoopBB); + IRB.CreateCondBr(EndCmp, ExitBB, LoopBB); + IRB.SetInsertPoint(ExitBB); + IRB.CreateRetVoid(); +} + static bool createInitOrFiniKernel(Module &M, StringRef GlobalName, bool IsCtor) { GlobalVariable *GV = M.getGlobalVariable(GlobalName); @@ -59,22 +127,10 @@ return false; Function *InitOrFiniKernel = createInitOrFiniKernelFunction(M, IsCtor); - IRBuilder<> IRB(InitOrFiniKernel->getEntryBlock().getTerminator()); - - FunctionType *ConstructorTy = InitOrFiniKernel->getFunctionType(); - - for (Value *V : GA->operands()) { - auto *CS = cast(V); - bool AlreadyRegistered = - llvm::any_of(CS->getOperand(1)->uses(), [=](Use &U) { - if (auto *CB = dyn_cast(U.getUser())) - if (CB->getCaller() == InitOrFiniKernel) - return true; - return false; - }); - if (!AlreadyRegistered) - IRB.CreateCall(ConstructorTy, CS->getOperand(1)); - } + if (!InitOrFiniKernel) + return false; + + createInitOrFiniCalls(*InitOrFiniKernel, IsCtor); appendToUsed(M, {InitOrFiniKernel}); return true; diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll --- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll @@ -1,5 +1,4 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-lower-ctor-dtor %s | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s ; Make sure we emit code for constructor entries that aren't direct ; function calls. @@ -18,6 +17,10 @@ @foo.alias = hidden alias void (), ptr @foo ;. +; CHECK: @__init_array_start = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @__init_array_end = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @__fini_array_start = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @__fini_array_end = external addrspace(1) constant [0 x ptr addrspace(1)] ; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata" ; CHECK: @foo.alias = hidden alias void (), ptr @foo ;. @@ -35,46 +38,31 @@ ret void } -; CHECK: define amdgpu_kernel void @amdgcn.device.init() #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: call void @foo.alias() -; CHECK-NEXT: call void inttoptr (i64 4096 to ptr)() -; CHECK-NEXT: ret void -; CHECK-NEXT: } - -; CHECK: define amdgpu_kernel void @amdgcn.device.fini() #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: call void addrspacecast (ptr addrspace(1) @bar to ptr)() -; CHECK-NEXT: ret void -; CHECK-NEXT: } - -;. -; CHECK: attributes #[[ATTR0]] = { "device-init" } -; CHECK: attributes #[[ATTR1]] = { "device-fini" } - +; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init() +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__init_array_start, ptr addrspace(1) @__init_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] +; CHECK: while.entry: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__init_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] +; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: call void [[CALLBACK]]() +; CHECK-NEXT: [[NEXT]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1 +; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__init_array_end +; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] +; CHECK: while.end: +; CHECK-NEXT: ret void -; GCN-LABEL: foo: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-LABEL: bar: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-LABEL: amdgcn.device.init: -; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} -; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], foo.alias@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], foo.alias@rel32@hi+12 -; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} +; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini() +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] +; CHECK: while.entry: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] +; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: call void [[CALLBACK]]() +; CHECK-NEXT: [[NEXT]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1 +; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end +; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] +; CHECK: while.end: +; CHECK-NEXT: ret void -; GCN: s_mov_b64 [[LIT_ADDR:s\[[0-9]+:[0-9]+\]]], 0x1000 -; GCN: s_swappc_b64 s[30:31], [[LIT_ADDR]] -; GCN-NEXT: s_endpgm -; -; GCN-LABEL: amdgcn.device.fini: -; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} -; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], bar@gotpcrel32@lo+4 -; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], bar@gotpcrel32@hi+12 -; GCN-NEXT: s_load_dwordx2 s{{\[}}[[GOT_LO:[0-9]+]]:[[GOT_HI:[0-9]+]]{{\]}}, s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}, 0x0 -; GCN: s_swappc_b64 s[30:31], s{{\[}}[[GOT_LO]]:[[GOT_HI]]{{\]}} -; GCN-NEXT: s_endpgm +; CHECK: attributes #[[ATTR0:[0-9]+]] = { "device-init" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { "device-fini" } diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll --- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll @@ -1,21 +1,19 @@ ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-ctor-dtor < %s | FileCheck %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -s - 2>&1 | FileCheck %s -check-prefix=CHECK-VIS -; Make sure there's no crash or error if amdgcn.device.init or -; amdgcn.device.fini already exist. +; Make sure that we don't modify the functions if amdgcn.device.init or +; amdgcn.device.fini already exit. @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }] @llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }] ; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0 { ; CHECK-NEXT: store volatile i32 1, ptr addrspace(1) null -; CHECK-NEXT: call void @foo() ; CHECK-NEXT: ret void ; CHECK-NEXT: } ; CHECK-LABEL: define amdgpu_kernel void @amdgcn.device.fini() #1 { ; CHECK-NEXT: store volatile i32 0, ptr addrspace(1) null -; CHECK-NEXT: call void @bar() ; CHECK-NEXT: ret void ; CHECK-NEXT: } diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll --- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll @@ -9,22 +9,44 @@ @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }] @llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }] +; CHECK: @__init_array_start = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @__init_array_end = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @__fini_array_start = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @__fini_array_end = external addrspace(1) constant [0 x ptr addrspace(1)] ; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini] -; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0 -; CHECK-NEXT: call void @foo -; CHECK-NEXT: ret void - -; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.fini() #1 -; CHECK-NEXT: call void @bar -; CHECK-NEXT: ret void +; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init() #0 +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__init_array_start, ptr addrspace(1) @__init_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] +; CHECK: while.entry: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__init_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] +; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: call void [[CALLBACK]]() +; CHECK-NEXT: [[NEXT]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1 +; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__init_array_end +; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] +; CHECK: while.end: +; CHECK-NEXT: ret void + +; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini() #1 +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] +; CHECK: while.entry: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] +; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: call void [[CALLBACK]]() +; CHECK-NEXT: [[NEXT]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1 +; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end +; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] +; CHECK: while.end: +; CHECK-NEXT: ret void ; CHECK-NOT: amdgcn.device. -; VISIBILITY: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.init -; VISIBILITY: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.init.kd -; VISIBILITY: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.fini -; VISIBILITY: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.fini.kd +; VISIBILITY: FUNC WEAK PROTECTED {{.*}} amdgcn.device.init +; VISIBILITY: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.init.kd +; VISIBILITY: FUNC WEAK PROTECTED {{.*}} amdgcn.device.fini +; VISIBILITY: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.fini.kd ; SECTION: .init_array.1 INIT_ARRAY {{.*}} {{.*}} 000008 00 WA 0 0 8 ; SECTION: .fini_array.1 FINI_ARRAY {{.*}} {{.*}} 000008 00 WA 0 0 8 diff --git a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll --- a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll @@ -4,18 +4,42 @@ @llvm.global_ctors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }, { i32, ptr, ptr } { i32 1, ptr @foo.5, ptr null }] @llvm.global_dtors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }, { i32, ptr, ptr } { i32 1, ptr @bar.5, ptr null }] -; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0 -; CHECK-NEXT: call void @foo -; CHECK-NEXT: call void @foo.5 - -; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.fini() #1 -; CHECK-NEXT: call void @bar -; CHECK-NEXT: call void @bar.5 - -; CHECK-VIS: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.init -; CHECK-VIS: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.init.kd -; CHECK-VIS: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.fini -; CHECK-VIS: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.fini.kd +; CHECK: @__init_array_start = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @__init_array_end = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @__fini_array_start = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @__fini_array_end = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini] + +; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init() #0 +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__init_array_start, ptr addrspace(1) @__init_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] +; CHECK: while.entry: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__init_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] +; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: call void [[CALLBACK]]() +; CHECK-NEXT: [[NEXT]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1 +; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__init_array_end +; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] +; CHECK: while.end: +; CHECK-NEXT: ret void + +; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini() #1 +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] +; CHECK: while.entry: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] +; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: call void [[CALLBACK]]() +; CHECK-NEXT: [[NEXT]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1 +; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end +; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] +; CHECK: while.end: +; CHECK-NEXT: ret void + +; CHECK-VIS: FUNC WEAK PROTECTED {{.*}} amdgcn.device.init +; CHECK-VIS: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.init.kd +; CHECK-VIS: FUNC WEAK PROTECTED {{.*}} amdgcn.device.fini +; CHECK-VIS: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.fini.kd define internal void @foo() { ret void @@ -34,4 +58,4 @@ } ; CHECK: attributes #0 = { "device-init" } -; CHECK: attributes #1 = { "device-fini" } \ No newline at end of file +; CHECK: attributes #1 = { "device-fini" }