diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
@@ -240,6 +240,14 @@
       if (F.isDeclaration())
         continue;
 
+      // Skip propagating attributes and features to
+      // address taken functions.
+      if (F.hasAddressTaken()) {
+        if (!Roots.count(&F))
+          NewRoots.insert(&F);
+        continue;
+      }
+
       const FnProperties CalleeProps(*TM, F);
       SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
       SmallSet<CallBase *, 32> Visited;
@@ -259,7 +267,11 @@
 
         const FnProperties CallerProps(*TM, *Caller);
 
-        if (CalleeProps == CallerProps) {
+        // Convergence is allowed if the caller has its
+        // address taken because all callee's (attributes + features)
+        // may not agree as the callee may be the target of
+        // more than one function (called directly or indirectly).
+        if (Caller->hasAddressTaken() || CalleeProps == CallerProps) {
           if (!Roots.count(&F))
             NewRoots.insert(&F);
           continue;
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-common-callees.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-common-callees.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-common-callees.ll
@@ -0,0 +1,59 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-early  %s | FileCheck %s
+
+; Complicated call graph where a function is called
+; directly from a kernel abd also from a function 
+; whose address is taken. 
+
+; CHECK-LABEL: define float @common_callee.gc() #0 {
+define float @common_callee.gc() {
+   ret float 0.0
+}
+
+; CHECK-LABEL: define float @foo() {
+define float @foo() {
+   ret float 0.0
+}
+
+; CHECK-LABEL: define float @bar() {
+define float @bar() {
+   ret float 0.0
+}
+
+; CHECK-LABEL: define float @baz() {
+define float @baz() {
+   ret float 0.0
+}
+
+define amdgpu_kernel void @switch_indirect_kernel(float *%result, i32 %type) #1 {
+  %fn = alloca float ()*
+  switch i32 %type, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+  ]
+
+sw.bb:
+  store float ()* @foo, float ()** %fn
+  br label %sw.epilog
+
+sw.bb2:
+  store float ()* @bar, float ()** %fn
+  br label %sw.epilog
+
+sw.bb3:
+  store float ()* @baz, float ()** %fn
+  br label %sw.epilog
+
+sw.default:
+  br label %sw.epilog
+
+sw.epilog:
+  %fp = load float ()*, float ()** %fn
+  %direct_call = call contract float @common_callee.gc()
+  %indirect_call = call contract float %fp()
+  store float %indirect_call, float* %result
+  ret void
+}
+
+attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "target-features"="-wavefrontsize16,-wavefrontsize32,+wavefrontsize64" }
+attributes #1 = { "amdgpu-flat-work-group-size"="1,256" } 
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect-common-callee.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect-common-callee.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect-common-callee.ll
@@ -0,0 +1,53 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-early  %s | FileCheck %s
+
+; Test to check if we skip propgating attributes even if
+; a function is called directly as well as
+; indirectly. "baz" is called directly as well indirectly.
+
+; CHECK-LABEL: define float @foo() {
+define float @foo() {
+  ret float 0.0
+}
+
+; CHECK-LABEL: define float @bar() {
+define float @bar() {
+ ret float 0.0
+}
+
+; CHECK-LABEL: define float @baz() {
+define float @baz() {
+ ret float 0.0
+}
+
+define amdgpu_kernel void @switch_indirect_kernel(float *%result, i32 %type) #1 {
+  %fn = alloca float ()*
+  switch i32 %type, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+  ]
+
+sw.bb:
+  store float ()* @foo, float ()** %fn
+  br label %sw.epilog
+
+sw.bb2:
+  store float ()* @bar, float ()** %fn
+  br label %sw.epilog
+
+sw.bb3:
+  store float ()* @baz, float ()** %fn
+  br label %sw.epilog
+
+sw.default:
+  br label %sw.epilog
+
+sw.epilog:
+  %fp = load float ()*, float ()** %fn
+  %direct_call = call contract float @baz()
+  %indirect_call = call contract float %fp()
+  store float %indirect_call, float* %result
+  ret void
+}
+
+attributes #1 = { "amdgpu-flat-work-group-size"="1,256" } 
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect.ll
@@ -0,0 +1,58 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-early  %s | FileCheck %s
+
+; Test to check if we skip attributes on address
+; taken functions but pass to direct callees.
+
+; CHECK-LABEL: define float @foo() {
+define float @foo() {
+  ret float 0.0
+}
+
+; CHECK-LABEL: define float @bar() {
+define float @bar() {
+  ret float 0.0
+}
+
+; CHECK-LABEL: define float @baz() {
+define float @baz() {
+  ret float 0.0
+}
+
+; CHECK-LABEL: define float @baz2() #0 {
+define float @baz2() {
+  ret float 0.0
+}
+
+define amdgpu_kernel void @switch_indirect_kernel(float *%result, i32 %type) #1 {
+  %fn = alloca float ()*
+  switch i32 %type, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+  ]
+
+sw.bb:
+  store float ()* @foo, float ()** %fn
+  br label %sw.epilog
+
+sw.bb2:
+  store float ()* @bar, float ()** %fn
+  br label %sw.epilog
+
+sw.bb3:
+  store float ()* @baz, float ()** %fn
+  br label %sw.epilog
+
+sw.default:
+  br label %sw.epilog
+
+sw.epilog:
+  %fp = load float ()*, float ()** %fn
+  %direct_call = call contract float @baz2()
+  %indirect_call = call contract float %fp()
+  store float %indirect_call, float* %result
+  ret void
+}
+
+attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "target-features"="-wavefrontsize16,-wavefrontsize32,+wavefrontsize64" }
+attributes #1 = { "amdgpu-flat-work-group-size"="1,256"} 
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-indirect.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-indirect.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-indirect.ll
@@ -0,0 +1,52 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-early  %s | FileCheck %s
+
+; Test to check if we skip attributes on address
+; taken functions in a simple call graph.
+
+; CHECK-LABEL: define float @foo() {
+define float @foo() {
+  ret float 0.0
+}
+
+; CHECK-LABEL: define float @bar() {
+define float @bar() {
+  ret float 0.0
+}
+
+; CHECK-LABEL: define float @baz() {
+define float @baz() {
+  ret float 0.0
+}
+
+define amdgpu_kernel void @switch_indirect_kernel(float *%result, i32 %type) #1 {
+  %fn = alloca float ()*
+  switch i32 %type, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+  ]
+
+sw.bb:
+  store float ()* @foo, float ()** %fn
+  br label %sw.epilog
+
+sw.bb2:
+  store float ()* @bar, float ()** %fn
+  br label %sw.epilog
+
+sw.bb3:
+  store float ()* @baz, float ()** %fn
+  br label %sw.epilog
+
+sw.default:
+  br label %sw.epilog
+
+sw.epilog:
+  %fp = load float ()*, float ()** %fn
+  %indirect_call = call contract float %fp()
+  store float %indirect_call, float* %result
+  ret void
+}
+
+attributes #1 = { "amdgpu-flat-work-group-size"="1,256" } 
+