Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -466,6 +466,7 @@ bool areInlineCompatible(const Function *Caller, const Function *Callee) const { + // If there is no target machine, be very conservative. return (Caller->getFnAttribute("target-cpu") == Callee->getFnAttribute("target-cpu")) && (Caller->getFnAttribute("target-features") == Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -1089,6 +1089,24 @@ return 10; } + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const { + const TargetMachine &TM = getTLI()->getTargetMachine(); + + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + + // Inline a callee if its target-features are a subset of the callers + // target-features. + // + // Targets can override if this is too limiting by including subtarget + // features that we might not care about for inlining, but it is + // conservatively correct. + return (CallerBits & CalleeBits) == CalleeBits; + } + unsigned getNumberOfParts(Type *Tp) { std::pair LT = getTLI()->getTypeLegalizationCost(DL, Tp); return LT.first; Index: lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.h +++ lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -65,9 +65,6 @@ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} - bool areInlineCompatible(const Function *Caller, - const Function *Callee) const; - /// \name Scalar TTI Implementations /// @{ Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -23,20 +23,6 @@ static cl::opt EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", cl::init(true), cl::Hidden); -bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, - const Function *Callee) const { - const TargetMachine &TM = getTLI()->getTargetMachine(); - - const FeatureBitset &CallerBits = - TM.getSubtargetImpl(*Caller)->getFeatureBits(); - const FeatureBitset &CalleeBits = - TM.getSubtargetImpl(*Callee)->getFeatureBits(); - - // Inline a callee if its target-features are a subset of the callers - // target-features. - return (CallerBits & CalleeBits) == CalleeBits; -} - /// \brief Calculate the cost of materializing a 64-bit value. This helper /// method might only calculate a fraction of a larger immediate. Therefore it /// is valid to return a cost of ZERO. Index: lib/Target/X86/X86TargetTransformInfo.h =================================================================== --- lib/Target/X86/X86TargetTransformInfo.h +++ lib/Target/X86/X86TargetTransformInfo.h @@ -109,8 +109,6 @@ bool isLegalMaskedStore(Type *DataType); bool isLegalMaskedGather(Type *DataType); bool isLegalMaskedScatter(Type *DataType); - bool areInlineCompatible(const Function *Caller, - const Function *Callee) const; bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize); bool enableInterleavedAccessVectorization(); private: Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -2221,22 +2221,6 @@ return isLegalMaskedGather(DataType); } -bool X86TTIImpl::areInlineCompatible(const Function *Caller, - const Function *Callee) const { - const TargetMachine &TM = getTLI()->getTargetMachine(); - - // Work this as a subsetting of subtarget features. - const FeatureBitset &CallerBits = - TM.getSubtargetImpl(*Caller)->getFeatureBits(); - const FeatureBitset &CalleeBits = - TM.getSubtargetImpl(*Callee)->getFeatureBits(); - - // FIXME: This is likely too limiting as it will include subtarget features - // that we might not care about for inlining, but it is conservatively - // correct. - return (CallerBits & CalleeBits) == CalleeBits; -} - bool X86TTIImpl::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) { // TODO: We can increase these based on available vector ops. MaxLoadSize = ST->is64Bit() ? 8 : 4; Index: test/Transforms/Inline/AMDGPU/inline-target-cpu.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/AMDGPU/inline-target-cpu.ll @@ -0,0 +1,49 @@ +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s + +; CHECK-LABEL: @func_no_target_cpu( +define i32 @func_no_target_cpu() #0 { + ret i32 0 +} + +; CHECK-LABEL: @target_cpu_call_no_target_cpu( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_call_no_target_cpu() #1 { + %call = call i32 @func_no_target_cpu() + ret i32 %call +} + +; CHECK-LABEL: @target_cpu_target_features_call_no_target_cpu( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_target_features_call_no_target_cpu() #2 { + %call = call i32 @func_no_target_cpu() + ret i32 %call +} + +; CHECK-LABEL: @fp32_denormals( +define i32 @fp32_denormals() #3 { + ret i32 0 +} + +; CHECK-LABEL: @no_fp32_denormals_call_f32_denormals( +; CHECK-NEXT: call i32 @fp32_denormals() +define i32 @no_fp32_denormals_call_f32_denormals() #4 { + %call = call i32 @fp32_denormals() + ret i32 %call +} + +; Make sure gfx9 can call unspecified functions because of movrel +; feature change. +; CHECK-LABEL: @gfx9_target_features_call_no_target_cpu( +; CHECK-NEXT: ret i32 0 +define i32 @gfx9_target_features_call_no_target_cpu() #5 { + %call = call i32 @func_no_target_cpu() + ret i32 %call +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "target-cpu"="fiji" } +attributes #2 = { nounwind "target-cpu"="fiji" "target-features"="+fp32-denormals" } +attributes #3 = { nounwind "target-features"="+fp32-denormals" } +attributes #4 = { nounwind "target-features"="-fp32-denormals" } +attributes #5 = { nounwind "target-cpu"="gfx900" } Index: test/Transforms/Inline/AMDGPU/lit.local.cfg =================================================================== --- /dev/null +++ test/Transforms/Inline/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True