Index: include/llvm/Analysis/TargetTransformInfoImpl.h
===================================================================
--- include/llvm/Analysis/TargetTransformInfoImpl.h
+++ include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -466,6 +466,7 @@
 
   bool areInlineCompatible(const Function *Caller,
                            const Function *Callee) const {
+    // If there is no target machine, be very conservative.
     return (Caller->getFnAttribute("target-cpu") ==
             Callee->getFnAttribute("target-cpu")) &&
            (Caller->getFnAttribute("target-features") ==
Index: include/llvm/CodeGen/BasicTTIImpl.h
===================================================================
--- include/llvm/CodeGen/BasicTTIImpl.h
+++ include/llvm/CodeGen/BasicTTIImpl.h
@@ -1089,6 +1089,24 @@
     return 10;
   }
 
+  bool areInlineCompatible(const Function *Caller,
+                           const Function *Callee) const {
+    const TargetMachine &TM = getTLI()->getTargetMachine();
+
+    const FeatureBitset &CallerBits =
+      TM.getSubtargetImpl(*Caller)->getFeatureBits();
+    const FeatureBitset &CalleeBits =
+      TM.getSubtargetImpl(*Callee)->getFeatureBits();
+
+    // Inline a callee if its target-features are a subset of the callers
+    // target-features.
+    //
+    // Targets can override if this is too limiting by including subtarget
+    // features that we might not care about for inlining, but it is
+    // conservatively correct.
+    return (CallerBits & CalleeBits) == CalleeBits;
+  }
+
   unsigned getNumberOfParts(Type *Tp) {
     std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
     return LT.first;
Index: lib/Target/AArch64/AArch64TargetTransformInfo.h
===================================================================
--- lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -65,9 +65,6 @@
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
-  bool areInlineCompatible(const Function *Caller,
-                           const Function *Callee) const;
-
   /// \name Scalar TTI Implementations
   /// @{
 
Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -23,20 +23,6 @@
 static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
                                                cl::init(true), cl::Hidden);
 
-bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
-                                         const Function *Callee) const {
-  const TargetMachine &TM = getTLI()->getTargetMachine();
-
-  const FeatureBitset &CallerBits =
-      TM.getSubtargetImpl(*Caller)->getFeatureBits();
-  const FeatureBitset &CalleeBits =
-      TM.getSubtargetImpl(*Callee)->getFeatureBits();
-
-  // Inline a callee if its target-features are a subset of the callers
-  // target-features.
-  return (CallerBits & CalleeBits) == CalleeBits;
-}
-
 /// \brief Calculate the cost of materializing a 64-bit value. This helper
 /// method might only calculate a fraction of a larger immediate. Therefore it
 /// is valid to return a cost of ZERO.
Index: lib/Target/X86/X86TargetTransformInfo.h
===================================================================
--- lib/Target/X86/X86TargetTransformInfo.h
+++ lib/Target/X86/X86TargetTransformInfo.h
@@ -109,8 +109,6 @@
   bool isLegalMaskedStore(Type *DataType);
   bool isLegalMaskedGather(Type *DataType);
   bool isLegalMaskedScatter(Type *DataType);
-  bool areInlineCompatible(const Function *Caller,
-                           const Function *Callee) const;
   bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize);
   bool enableInterleavedAccessVectorization();
 private:
Index: lib/Target/X86/X86TargetTransformInfo.cpp
===================================================================
--- lib/Target/X86/X86TargetTransformInfo.cpp
+++ lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2221,22 +2221,6 @@
   return isLegalMaskedGather(DataType);
 }
 
-bool X86TTIImpl::areInlineCompatible(const Function *Caller,
-                                     const Function *Callee) const {
-  const TargetMachine &TM = getTLI()->getTargetMachine();
-
-  // Work this as a subsetting of subtarget features.
-  const FeatureBitset &CallerBits =
-      TM.getSubtargetImpl(*Caller)->getFeatureBits();
-  const FeatureBitset &CalleeBits =
-      TM.getSubtargetImpl(*Callee)->getFeatureBits();
-
-  // FIXME: This is likely too limiting as it will include subtarget features
-  // that we might not care about for inlining, but it is conservatively
-  // correct.
-  return (CallerBits & CalleeBits) == CalleeBits;
-}
-
 bool X86TTIImpl::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) {
   // TODO: We can increase these based on available vector ops.
   MaxLoadSize = ST->is64Bit() ? 8 : 4;
Index: test/Transforms/Inline/AMDGPU/inline-target-cpu.ll
===================================================================
--- /dev/null
+++ test/Transforms/Inline/AMDGPU/inline-target-cpu.ll
@@ -0,0 +1,49 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s
+
+; CHECK-LABEL: @func_no_target_cpu(
+define i32 @func_no_target_cpu() #0 {
+  ret i32 0
+}
+
+; CHECK-LABEL: @target_cpu_call_no_target_cpu(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_call_no_target_cpu() #1 {
+  %call = call i32 @func_no_target_cpu()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @target_cpu_target_features_call_no_target_cpu(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_target_features_call_no_target_cpu() #2 {
+  %call = call i32 @func_no_target_cpu()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @fp32_denormals(
+define i32 @fp32_denormals() #3 {
+  ret i32 0
+}
+
+; CHECK-LABEL: @no_fp32_denormals_call_f32_denormals(
+; CHECK-NEXT: call i32 @fp32_denormals()
+define i32 @no_fp32_denormals_call_f32_denormals() #4 {
+  %call = call i32 @fp32_denormals()
+  ret i32 %call
+}
+
+; Make sure gfx9 can call unspecified functions because of movrel
+; feature change.
+; CHECK-LABEL: @gfx9_target_features_call_no_target_cpu(
+; CHECK-NEXT: ret i32 0
+define i32 @gfx9_target_features_call_no_target_cpu() #5 {
+  %call = call i32 @func_no_target_cpu()
+  ret i32 %call
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "target-cpu"="fiji" }
+attributes #2 = { nounwind "target-cpu"="fiji" "target-features"="+fp32-denormals" }
+attributes #3 = { nounwind "target-features"="+fp32-denormals" }
+attributes #4 = { nounwind "target-features"="-fp32-denormals" }
+attributes #5 = { nounwind "target-cpu"="gfx900" }
Index: test/Transforms/Inline/AMDGPU/lit.local.cfg
===================================================================
--- /dev/null
+++ test/Transforms/Inline/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True