diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp @@ -236,6 +236,14 @@ if (F.isDeclaration()) continue; + // Skip propagating attributes and features to + // address taken functions. + if (F.hasAddressTaken()) { + if (!Roots.count(&F)) + NewRoots.insert(&F); + continue; + } + const FnProperties CalleeProps(*TM, F); SmallVector, 32> ToReplace; SmallSet Visited; @@ -255,7 +263,11 @@ const FnProperties CallerProps(*TM, *Caller); - if (CalleeProps == CallerProps) { + // Convergence is allowed if the caller has its + // address taken because all callee's (attributes + features) + // may not agree as the callee may be the target of + // more than one function (called directly or indirectly). + if (Caller->hasAddressTaken() || CalleeProps == CallerProps) { if (!Roots.count(&F)) NewRoots.insert(&F); continue; diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-common-callees.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-common-callees.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-common-callees.ll @@ -0,0 +1,59 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-early %s | FileCheck %s + +; Complicated call graph where a function is called +; directly from a kernel abd also from a function +; whose address is taken. + +; CHECK-LABEL: define float @common_callee.gc() #0 { +define float @common_callee.gc() { + ret float 0.0 +} + +; CHECK-LABEL: define float @foo() { +define float @foo() { + ret float 0.0 +} + +; CHECK-LABEL: define float @bar() { +define float @bar() { + ret float 0.0 +} + +; CHECK-LABEL: define float @baz() { +define float @baz() { + ret float 0.0 +} + +define amdgpu_kernel void @switch_indirect_kernel(float *%result, i32 %type) #1 { + %fn = alloca float ()* + switch i32 %type, label %sw.default [ + i32 1, label %sw.bb + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] + +sw.bb: + store float ()* @foo, float ()** %fn + br label %sw.epilog + +sw.bb2: + store float ()* @bar, float ()** %fn + br label %sw.epilog + +sw.bb3: + store float ()* @baz, float ()** %fn + br label %sw.epilog + +sw.default: + br label %sw.epilog + +sw.epilog: + %fp = load float ()*, float ()** %fn + %direct_call = call contract float @common_callee.gc() + %indirect_call = call contract float %fp() + store float %indirect_call, float* %result + ret void +} + +attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "target-features"="-wavefrontsize16,-wavefrontsize32,+wavefrontsize64" } +attributes #1 = { "amdgpu-flat-work-group-size"="1,256" } diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect-common-callee.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect-common-callee.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect-common-callee.ll @@ -0,0 +1,53 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-early %s | FileCheck %s + +; Test to check if we skip propgating attributes even if +; a function is called directly as well as +; indirectly. "baz" is called directly as well indirectly. + +; CHECK-LABEL: define float @foo() { +define float @foo() { + ret float 0.0 +} + +; CHECK-LABEL: define float @bar() { +define float @bar() { + ret float 0.0 +} + +; CHECK-LABEL: define float @baz() { +define float @baz() { + ret float 0.0 +} + +define amdgpu_kernel void @switch_indirect_kernel(float *%result, i32 %type) #1 { + %fn = alloca float ()* + switch i32 %type, label %sw.default [ + i32 1, label %sw.bb + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] + +sw.bb: + store float ()* @foo, float ()** %fn + br label %sw.epilog + +sw.bb2: + store float ()* @bar, float ()** %fn + br label %sw.epilog + +sw.bb3: + store float ()* @baz, float ()** %fn + br label %sw.epilog + +sw.default: + br label %sw.epilog + +sw.epilog: + %fp = load float ()*, float ()** %fn + %direct_call = call contract float @baz() + %indirect_call = call contract float %fp() + store float %indirect_call, float* %result + ret void +} + +attributes #1 = { "amdgpu-flat-work-group-size"="1,256" } diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect.ll @@ -0,0 +1,58 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-early %s | FileCheck %s + +; Test to check if we skip attributes on address +; taken functions but pass to direct callees. + +; CHECK-LABEL: define float @foo() { +define float @foo() { + ret float 0.0 +} + +; CHECK-LABEL: define float @bar() { +define float @bar() { + ret float 0.0 +} + +; CHECK-LABEL: define float @baz() { +define float @baz() { + ret float 0.0 +} + +; CHECK-LABEL: define float @baz2() #0 { +define float @baz2() { + ret float 0.0 +} + +define amdgpu_kernel void @switch_indirect_kernel(float *%result, i32 %type) #1 { + %fn = alloca float ()* + switch i32 %type, label %sw.default [ + i32 1, label %sw.bb + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] + +sw.bb: + store float ()* @foo, float ()** %fn + br label %sw.epilog + +sw.bb2: + store float ()* @bar, float ()** %fn + br label %sw.epilog + +sw.bb3: + store float ()* @baz, float ()** %fn + br label %sw.epilog + +sw.default: + br label %sw.epilog + +sw.epilog: + %fp = load float ()*, float ()** %fn + %direct_call = call contract float @baz2() + %indirect_call = call contract float %fp() + store float %indirect_call, float* %result + ret void +} + +attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "target-features"="-wavefrontsize16,-wavefrontsize32,+wavefrontsize64" } +attributes #1 = { "amdgpu-flat-work-group-size"="1,256"} diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-indirect.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-indirect.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-indirect.ll @@ -0,0 +1,52 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-early %s | FileCheck %s + +; Test to check if we skip attributes on address +; taken functions in a simple call graph. + +; CHECK-LABEL: define float @foo() { +define float @foo() { + ret float 0.0 +} + +; CHECK-LABEL: define float @bar() { +define float @bar() { + ret float 0.0 +} + +; CHECK-LABEL: define float @baz() { +define float @baz() { + ret float 0.0 +} + +define amdgpu_kernel void @switch_indirect_kernel(float *%result, i32 %type) #1 { + %fn = alloca float ()* + switch i32 %type, label %sw.default [ + i32 1, label %sw.bb + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] + +sw.bb: + store float ()* @foo, float ()** %fn + br label %sw.epilog + +sw.bb2: + store float ()* @bar, float ()** %fn + br label %sw.epilog + +sw.bb3: + store float ()* @baz, float ()** %fn + br label %sw.epilog + +sw.default: + br label %sw.epilog + +sw.epilog: + %fp = load float ()*, float ()** %fn + %indirect_call = call contract float %fp() + store float %indirect_call, float* %result + ret void +} + +attributes #1 = { "amdgpu-flat-work-group-size"="1,256" } +