Index: llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -353,7 +353,6 @@ } bool NeedsQueuePtr = false; - bool HasCall = false; for (Function *Callee : AAEdges.getOptimisticEdges()) { Intrinsic::ID IID = Callee->getIntrinsicID(); if (IID != Intrinsic::not_intrinsic) { @@ -367,7 +366,6 @@ continue; } - HasCall = true; const AAAMDAttributes &AAAMD = A.getAAFor( *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); const DenseSet &CalleeAttributes = AAAMD.getAttributes(); @@ -377,20 +375,6 @@ AddAttribute(AttrName); } - HasCall |= AAEdges.hasUnknownCallee(); - if (!IsNonEntryFunc && HasCall) - AddAttribute("amdgpu-calls"); - - // Check the function body. - auto CheckAlloca = [&](Instruction &I) { - AddAttribute("amdgpu-stack-objects"); - return false; - }; - - bool UsedAssumedInformation = false; - A.checkForAllInstructions(CheckAlloca, *this, {Instruction::Alloca}, - UsedAssumedInformation); - // If we found that we need amdgpu-queue-ptr, nothing else to do. if (NeedsQueuePtr || Attributes.count("amdgpu-queue-ptr")) { AddAttribute("amdgpu-queue-ptr"); @@ -412,10 +396,12 @@ // instructions, try it first. // amdgpu-queue-ptr is not needed if aperture regs is present. - if (!HasApertureRegs) + if (!HasApertureRegs) { + bool UsedAssumedInformation = false; A.checkForAllInstructions(CheckAddrSpaceCasts, *this, {Instruction::AddrSpaceCast}, UsedAssumedInformation); + } // If we found that we need amdgpu-queue-ptr, nothing else to do. if (NeedsQueuePtr) { Index: llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -212,7 +212,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_x ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_x() #[[ATTR21:[0-9]+]] +; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_x() #[[ATTR20:[0-9]+]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workitem_id_x() @@ -227,7 +227,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workitem_id_x ; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_x() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_x() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workitem_id_x() @@ -242,7 +242,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_y ; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_y() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_y() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workitem_id_y() @@ -257,7 +257,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_z ; ATTRIBUTOR_HSA-SAME: () #[[ATTR3]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_z() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_z() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workitem_id_z() @@ -272,7 +272,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_x ; ATTRIBUTOR_HSA-SAME: () #[[ATTR4]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_x() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_x() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workgroup_id_x() @@ -287,7 +287,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workgroup_id_x ; ATTRIBUTOR_HSA-SAME: () #[[ATTR4]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_x() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_x() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workgroup_id_x() @@ -302,7 +302,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y ; ATTRIBUTOR_HSA-SAME: () #[[ATTR5]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_y() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_y() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workgroup_id_y() @@ -317,7 +317,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_z ; ATTRIBUTOR_HSA-SAME: () #[[ATTR6]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_z() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_z() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workgroup_id_z() @@ -332,7 +332,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_indirect_use_workgroup_id_y ; ATTRIBUTOR_HSA-SAME: () #[[ATTR5]] { -; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_use_workgroup_id_y() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_use_workgroup_id_y() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @func_indirect_use_workgroup_id_y() @@ -347,7 +347,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_x2_use_workgroup_id_y ; ATTRIBUTOR_HSA-SAME: () #[[ATTR5]] { -; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_indirect_use_workgroup_id_y() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_indirect_use_workgroup_id_y() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @func_indirect_indirect_use_workgroup_id_y() @@ -362,7 +362,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr ; ATTRIBUTOR_HSA-SAME: () #[[ATTR7]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_dispatch_ptr() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @use_dispatch_ptr() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_dispatch_ptr() @@ -377,7 +377,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_queue_ptr ; ATTRIBUTOR_HSA-SAME: () #[[ATTR8]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_queue_ptr() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @use_queue_ptr() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_queue_ptr() @@ -392,7 +392,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_id ; ATTRIBUTOR_HSA-SAME: () #[[ATTR9]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_dispatch_id() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @use_dispatch_id() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_dispatch_id() @@ -425,7 +425,7 @@ ; ATTRIBUTOR_HSA-SAME: () #[[ATTR12:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() ; ATTRIBUTOR_HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* null, align 4 -; ATTRIBUTOR_HSA-NEXT: call void @recursive_use_workitem_id_y() #[[ATTR22:[0-9]+]] +; ATTRIBUTOR_HSA-NEXT: call void @recursive_use_workitem_id_y() #[[ATTR21:[0-9]+]] ; ATTRIBUTOR_HSA-NEXT: unreachable ; %val = call i32 @llvm.amdgcn.workitem.id.y() @@ -442,7 +442,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@call_recursive_use_workitem_id_y ; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] { -; ATTRIBUTOR_HSA-NEXT: call void @recursive_use_workitem_id_y() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: call void @recursive_use_workitem_id_y() #[[ATTR21]] ; ATTRIBUTOR_HSA-NEXT: unreachable ; call void @recursive_use_workitem_id_y() @@ -498,7 +498,7 @@ ; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR14:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* ; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 -; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_use_queue_ptr() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_use_queue_ptr() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* @@ -515,7 +515,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast ; ATTRIBUTOR_HSA-SAME: () #[[ATTR8]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null) #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null) #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null) @@ -530,7 +530,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_gfx9 ; ATTRIBUTOR_HSA-SAME: () #[[ATTR15:[0-9]+]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null) #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null) #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null) @@ -545,7 +545,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9 ; ATTRIBUTOR_HSA-SAME: () #[[ATTR8]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null) #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null) #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null) @@ -577,7 +577,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_kernarg_segment_ptr ; ATTRIBUTOR_HSA-SAME: () #[[ATTR15]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_kernarg_segment_ptr() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @use_kernarg_segment_ptr() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_kernarg_segment_ptr() @@ -628,7 +628,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_implicitarg_ptr ; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] { -; ATTRIBUTOR_HSA-NEXT: call void @use_implicitarg_ptr() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @use_implicitarg_ptr() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_implicitarg_ptr() @@ -654,7 +654,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_external ; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] { -; ATTRIBUTOR_HSA-NEXT: call void @external.func() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void @external.func() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @external.func() @@ -682,7 +682,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm ; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] { -; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void asm sideeffect "", ""() #3 @@ -696,8 +696,8 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_external -; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] { -; ATTRIBUTOR_HSA-NEXT: call void @external.func() #[[ATTR21]] +; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] { +; ATTRIBUTOR_HSA-NEXT: call void @external.func() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @external.func() @@ -726,7 +726,7 @@ ; AKF_HSA-NEXT: ret i32 0 ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type -; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() ; ATTRIBUTOR_HSA-NEXT: store volatile i8 addrspace(4)* [[DISPATCH_PTR]], i8 addrspace(4)* addrspace(1)* null, align 8 ; ATTRIBUTOR_HSA-NEXT: ret i32 0 @@ -744,7 +744,7 @@ ; AKF_HSA-NEXT: ret float [[FADD]] ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func -; ATTRIBUTOR_HSA-SAME: () #[[ATTR20]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] { ; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float bitcast (i32 ()* @use_dispatch_ptr_ret_type to float ()*)() ; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]] @@ -782,7 +782,7 @@ ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_extern_call ; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] { -; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float @extern() #[[ATTR21]] +; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float @extern() #[[ATTR20]] ; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]] ; @@ -856,8 +856,7 @@ ; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { nounwind } -; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { noreturn nounwind } +; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind } +; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { noreturn nounwind } ;. Index: llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -448,8 +448,9 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca -; ATTRIBUTOR_HSA-SAME: () #[[ATTR12:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { ; ATTRIBUTOR_HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) +; ATTRIBUTOR_HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; %alloca = alloca i32, addrspace(5) @@ -468,11 +469,12 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca_non_entry_block -; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { ; ATTRIBUTOR_HSA-NEXT: entry: ; ATTRIBUTOR_HSA-NEXT: br label [[BB:%.*]] ; ATTRIBUTOR_HSA: bb: ; ATTRIBUTOR_HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) +; ATTRIBUTOR_HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; entry: @@ -492,8 +494,9 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca_func -; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { ; ATTRIBUTOR_HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) +; ATTRIBUTOR_HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; %alloca = alloca i32, addrspace(5) @@ -531,5 +534,4 @@ ; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-stack-objects" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll @@ -20,7 +20,7 @@ ; AKF_GCN-NEXT: ret void ; ; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@direct -; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_GCN-SAME: () #[[ATTR0]] { ; ATTRIBUTOR_GCN-NEXT: call void @indirect() ; ATTRIBUTOR_GCN-NEXT: ret void ; @@ -32,10 +32,15 @@ } define amdgpu_kernel void @test_direct_indirect_call() { -; GCN-LABEL: define {{[^@]+}}@test_direct_indirect_call -; GCN-SAME: () #[[ATTR2:[0-9]+]] { -; GCN-NEXT: call void @direct() -; GCN-NEXT: ret void +; AKF_GCN-LABEL: define {{[^@]+}}@test_direct_indirect_call +; AKF_GCN-SAME: () #[[ATTR2:[0-9]+]] { +; AKF_GCN-NEXT: call void @direct() +; AKF_GCN-NEXT: ret void +; +; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_direct_indirect_call +; ATTRIBUTOR_GCN-SAME: () #[[ATTR0]] { +; ATTRIBUTOR_GCN-NEXT: call void @direct() +; ATTRIBUTOR_GCN-NEXT: ret void ; call void @direct() ret void @@ -46,6 +51,4 @@ ; AKF_GCN: attributes #[[ATTR2]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ;. ; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR2]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll +++ llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll @@ -23,8 +23,11 @@ ; AKF_GCN-NEXT: ret void ; ; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call -; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] { -; ATTRIBUTOR_GCN-NEXT: call void @indirect() +; ATTRIBUTOR_GCN-SAME: () #[[ATTR0]] { +; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8 +; ATTRIBUTOR_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8 +; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8 +; ATTRIBUTOR_GCN-NEXT: call void [[FP]]() ; ATTRIBUTOR_GCN-NEXT: ret void ; ; CHECK-LABEL: define {{[^@]+}}@test_simple_indirect_call @@ -48,5 +51,4 @@ ; AKF_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } ;. ; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -53,8 +53,12 @@ ; AKF_GCN-NEXT: ret void ; ; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call -; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] { -; ATTRIBUTOR_GCN-NEXT: call void @indirect() +; ATTRIBUTOR_GCN-SAME: () #[[ATTR0]] { +; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8, addrspace(5) +; ATTRIBUTOR_GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast void ()* addrspace(5)* [[FPTR]] to void ()** +; ATTRIBUTOR_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR_CAST]], align 8 +; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR_CAST]], align 8 +; ATTRIBUTOR_GCN-NEXT: call void [[FP]]() ; ATTRIBUTOR_GCN-NEXT: ret void ; %fptr = alloca void()*, addrspace(5) @@ -70,5 +74,4 @@ ; AKF_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } ;. ; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll +++ llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll @@ -41,6 +41,6 @@ ; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } ;. ; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind writeonly } ;. Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll +++ llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll @@ -148,7 +148,7 @@ ;. ; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "uniform-work-group-size"="true" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nofree nosync nounwind "uniform-work-group-size"="true" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn writeonly "uniform-work-group-size"="true" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind } Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll +++ llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll @@ -63,6 +63,6 @@ ; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } ;. ; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="true" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind writeonly } ;. Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll +++ llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll @@ -59,7 +59,7 @@ ; AKF_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="false" } ;. ; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind writeonly } ;. Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll +++ llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll @@ -73,9 +73,9 @@ ; AKF_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" } ;. ; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { "uniform-work-group-size"="true" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind writeonly } ; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind } ;. Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll +++ llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll @@ -150,7 +150,7 @@ ;. ; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind readnone "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind readnone "uniform-work-group-size"="true" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "uniform-work-group-size"="true" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind readnone } ; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nofree nounwind readnone } ;. Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll +++ llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll @@ -83,6 +83,6 @@ ; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } ;. ; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind writeonly } ;.