diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h
--- a/clang/lib/CodeGen/CGCall.h
+++ b/clang/lib/CodeGen/CGCall.h
@@ -395,10 +395,17 @@
   bool isExternallyDestructed() const { return IsExternallyDestructed; }
 };
 
-/// Helper to add attributes to \p F according to the CodeGenOptions and
+/// If \p F "target-features" are incompatible with the \p TargetOpts features,
+/// it is correct to drop the function. \return true if \p F is dropped
+bool dropFunctionWithIncompatibleAttributes(llvm::Function &F,
+                                            const TargetOptions &TargetOpts);
+
 /// LangOptions without requiring a CodeGenModule to be constructed.
+/// Adds attributes to \p F according to our \p CodeGenOpts and \p LangOpts, as
+/// though we had emitted it ourselves. We remove any attributes on F that
+/// conflict with the attributes we add here.
 void mergeDefaultFunctionDefinitionAttributes(llvm::Function &F,
-                                              const CodeGenOptions CodeGenOpts,
+                                              const CodeGenOptions &CodeGenOpts,
                                               const LangOptions &LangOpts,
                                               const TargetOptions &TargetOpts,
                                               bool WillInternalize);
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2002,10 +2002,41 @@
   }
 }
 
-/// Adds attributes to \p F according to our \p CodeGenOpts and \p LangOpts, as
-/// though we had emitted it ourselves. We remove any attributes on F that
-/// conflict with the attributes we add here.
-static void mergeDefaultFunctionDefinitionAttributes(
+static void
+overrideFunctionFeaturesWithTargetFeatures(llvm::AttrBuilder &FuncAttr,
+                                           const llvm::Function &F,
+                                           const TargetOptions &TargetOpts) {
+  auto FFeatures = F.getFnAttribute("target-features");
+
+  SmallVector<StringRef> MergedFeatures(TargetOpts.Features.begin(),
+                                        TargetOpts.Features.end());
+
+  if (FFeatures.isValid()) {
+    const auto &TFeatures = TargetOpts.FeatureMap;
+    for (StringRef Feature : llvm::split(FFeatures.getValueAsString(), ',')) {
+      bool EnabledForFunc = Feature[0] == '+';
+      StringRef Name = Feature.substr(1);
+      auto TEntry = TFeatures.find(Name);
+
+      // if the feature is not set for the target-opts, it must be preserved
+      if (TEntry == TFeatures.end()) {
+        MergedFeatures.push_back(Feature);
+        continue;
+      }
+
+      // if the feature is enabled for one and disabled for the other, they are
+      // not compatible
+      bool EnabledForTarget = TEntry->second;
+      if (EnabledForTarget != EnabledForFunc)
+        return;
+    }
+  }
+
+  if (!MergedFeatures.empty())
+    FuncAttr.addAttribute("target-features", llvm::join(MergedFeatures, ","));
+}
+
+void CodeGen::mergeDefaultFunctionDefinitionAttributes(
     llvm::Function &F, const CodeGenOptions &CodeGenOpts,
     const LangOptions &LangOpts, const TargetOptions &TargetOpts,
     bool WillInternalize) {
@@ -2062,16 +2093,30 @@
 
   F.removeFnAttrs(AttrsToRemove);
   addDenormalModeAttrs(Merged, MergedF32, FuncAttrs);
+
+  overrideFunctionFeaturesWithTargetFeatures(FuncAttrs, F, TargetOpts);
+
   F.addFnAttrs(FuncAttrs);
 }
 
-void clang::CodeGen::mergeDefaultFunctionDefinitionAttributes(
-    llvm::Function &F, const CodeGenOptions CodeGenOpts,
-    const LangOptions &LangOpts, const TargetOptions &TargetOpts,
-    bool WillInternalize) {
+bool CodeGen::dropFunctionWithIncompatibleAttributes(
+    llvm::Function &F, const TargetOptions &TargetOpts) {
+  auto FFeatures = F.getFnAttribute("target-features");
+  if (!FFeatures.isValid())
+    return false;
 
-  ::mergeDefaultFunctionDefinitionAttributes(F, CodeGenOpts, LangOpts,
-                                             TargetOpts, WillInternalize);
+  const auto &TFeatures = TargetOpts.FeatureMap;
+  for (StringRef Feature : llvm::split(FFeatures.getValueAsString(), ',')) {
+    bool EnabledForFunc = Feature[0] == '+';
+    StringRef Name = Feature.substr(1);
+    auto TEntry = TFeatures.find(Name);
+    if (TEntry != TFeatures.end() && TEntry->second != EnabledForFunc) {
+      F.replaceAllUsesWith(llvm::ConstantPointerNull::get(F.getType()));
+      F.eraseFromParent();
+      return true;
+    }
+  }
+  return false;
 }
 
 void CodeGenModule::getTrivialDefaultFunctionAttributes(
diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -266,15 +266,20 @@
     bool LinkInModules(llvm::Module *M) {
       for (auto &LM : LinkModules) {
         assert(LM.Module && "LinkModule does not actually have a module");
-        if (LM.PropagateAttrs)
-          for (Function &F : *LM.Module) {
+        if (LM.PropagateAttrs) {
+          for (Function &F : llvm::make_early_inc_range(*LM.Module)) {
             // Skip intrinsics. Keep consistent with how intrinsics are created
             // in LLVM IR.
             if (F.isIntrinsic())
               continue;
+
+            if (CodeGen::dropFunctionWithIncompatibleAttributes(F, TargetOpts))
+              continue;
+
             CodeGen::mergeDefaultFunctionDefinitionAttributes(
                 F, CodeGenOpts, LangOpts, TargetOpts, LM.Internalize);
           }
+        }
 
         CurLinkModule = LM.Module.get();
 
diff --git a/clang/test/CodeGen/link-builtin-bitcode.c b/clang/test/CodeGen/link-builtin-bitcode.c
--- a/clang/test/CodeGen/link-builtin-bitcode.c
+++ b/clang/test/CodeGen/link-builtin-bitcode.c
@@ -1,42 +1,47 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --include-generated-funcs --version 2
+// Build two version of the bitcode library, one with a target-cpu set and one without
 // RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx803 -DBITCODE -emit-llvm-bc -o %t-lib.bc %s
+// RUN: %clang_cc1 -triple amdgcn-- -DBITCODE -emit-llvm-bc -o %t-lib.no-cpu.bc %s
+
 // RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm-bc -o %t.bc %s
 // RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm \
 // RUN:   -mlink-builtin-bitcode %t-lib.bc -o - %t.bc | FileCheck %s
 
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm-bc -o %t.bc %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm \
+// RUN:   -mlink-builtin-bitcode %t-lib.no-cpu.bc -o - %t.bc | FileCheck %s
+
 #ifdef BITCODE
-int foo(void) { return 42; }
+int no_attr(void) { return 42; }
+int __attribute__((target("gfx8-insts"))) attr_in_target(void) { return 42; }
+int __attribute__((target("extended-image-insts"))) attr_not_in_target(void) { return 42; }
+int __attribute__((target("no-gfx9-insts"))) attr_incompatible(void) { return 42; }
 int x = 12;
 #endif
 
-extern int foo(void);
+extern int no_attr(void);
+extern int attr_in_target(void);
+extern int attr_not_in_target(void);
+extern int attr_incompatible(void);
 extern int x;
 
-int bar() { return foo() + x; }
-//.
+int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_incompatible() + x; }
+
 // CHECK: @x = internal addrspace(1) global i32 12, align 4
-//.
-// CHECK: Function Attrs: noinline nounwind optnone
+
 // CHECK-LABEL: define dso_local i32 @bar
-// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT:    [[CALL:%.*]] = call i32 @foo()
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @x to ptr), align 4
-// CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[CALL]], [[TMP0]]
-// CHECK-NEXT:    ret i32 [[ADD]]
+// CHECK-SAME: () #[[ATTR_BAR:[0-9]+]] {
 //
-//
-// CHECK: Function Attrs: convergent noinline nounwind optnone
-// CHECK-LABEL: define internal i32 @foo
-// CHECK-SAME: () #[[ATTR1:[0-9]+]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT:    ret i32 42
-//
-//.
-// CHECK: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
-// CHECK: attributes #1 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
-//.
+// CHECK-LABEL: define internal i32 @no_attr
+// CHECK-SAME: () #[[ATTR_COMPATIBLE:[0-9]+]] {
+
+// CHECK-LABEL: define internal i32 @attr_in_target
+// CHECK-SAME: () #[[ATTR_COMPATIBLE:[0-9]+]] {
+
+// CHECK-LABEL: define internal i32 @attr_not_in_target
+// CHECK-SAME: () #[[ATTR_EXTEND:[0-9]+]] {
+
+// CHECK-NOT: @attr_incompatible
+
+// CHECK: attributes #[[ATTR_BAR]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+// CHECK: attributes #[[ATTR_COMPATIBLE]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+// CHECK: attributes #[[ATTR_EXTEND]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+extended-image-insts" }
diff --git a/clang/test/CodeGenCUDA/link-builtin-bitcode-gpu-attrs-preserved.cu b/clang/test/CodeGenCUDA/link-builtin-bitcode-gpu-attrs-preserved.cu
--- a/clang/test/CodeGenCUDA/link-builtin-bitcode-gpu-attrs-preserved.cu
+++ b/clang/test/CodeGenCUDA/link-builtin-bitcode-gpu-attrs-preserved.cu
@@ -31,7 +31,7 @@
 
 
 // CHECK: define {{.*}} i64 @do_intrin_stuff() #[[ATTR:[0-9]+]]
-// INTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-cpu"="gfx{{.*}}" "target-features"="+gfx11-insts"
+// INTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-cpu"="gfx{{.*}}" "target-features"="{{.*}}+gfx11-insts{{.*}}"
 // NOINTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-features"="+gfx11-insts"
 
 #define __device__ __attribute__((device))