Index: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h +++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h @@ -35,6 +35,60 @@ const X86Subtarget *getST() const { return ST; } const X86TargetLowering *getTLI() const { return TLI; } + const FeatureBitset InlineFeatureIgnoreList = { + // This indicates the CPU is 64 bit capable not that we are in 64-bit mode. + X86::Feature64Bit, + + // These features don't have any intrinsics or ABI effect. + X86::FeatureNOPL, + X86::FeatureCMPXCHG16B, + X86::FeatureLAHFSAHF, + + // Codegen control options. + X86::FeatureFast11ByteNOP, + X86::FeatureFast15ByteNOP, + X86::FeatureFastBEXTR, + X86::FeatureFastHorizontalOps, + X86::FeatureFastLZCNT, + X86::FeatureFastPartialYMMorZMMWrite, + X86::FeatureFastScalarFSQRT, + X86::FeatureFastSHLDRotate, + X86::FeatureFastVariableShuffle, + X86::FeatureFastVectorFSQRT, + X86::FeatureLEAForSP, + X86::FeatureLEAUsesAG, + X86::FeatureLZCNTFalseDeps, + X86::FeatureMacroFusion, + X86::FeatureMergeToThreeWayBranch, + X86::FeaturePadShortFunctions, + X86::FeaturePOPCNTFalseDeps, + X86::FeatureSSEUnalignedMem, + X86::FeatureSlow3OpsLEA, + X86::FeatureSlowDivide32, + X86::FeatureSlowDivide64, + X86::FeatureSlowIncDec, + X86::FeatureSlowLEA, + X86::FeatureSlowPMADDWD, + X86::FeatureSlowPMULLD, + X86::FeatureSlowSHLD, + X86::FeatureSlowTwoMemOps, + X86::FeatureSlowUAMem16, + + // Perf-tuning flags. + X86::FeatureHasFastGather, + X86::FeatureSlowUAMem32, + + // Based on whether user set the -mprefer-vector-width command line. + X86::FeaturePrefer256Bit, + + // CPU name enums. These just follow CPU string. + X86::ProcIntelAtom, + X86::ProcIntelGLM, + X86::ProcIntelGLP, + X86::ProcIntelSLM, + X86::ProcIntelTRM, + }; + public: explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), Index: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp +++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3065,10 +3065,9 @@ const FeatureBitset &CalleeBits = TM.getSubtargetImpl(*Callee)->getFeatureBits(); - // FIXME: This is likely too limiting as it will include subtarget features - // that we might not care about for inlining, but it is conservatively - // correct. - return (CallerBits & CalleeBits) == CalleeBits; + FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList; + FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList; + return (RealCallerBits & RealCalleeBits) == RealCalleeBits; } const X86TTIImpl::TTI::MemCmpExpansionOptions * Index: llvm/trunk/test/Transforms/Inline/X86/inline-target-cpu-i686.ll =================================================================== --- llvm/trunk/test/Transforms/Inline/X86/inline-target-cpu-i686.ll +++ llvm/trunk/test/Transforms/Inline/X86/inline-target-cpu-i686.ll @@ -0,0 +1,15 @@ +; RUN: opt < %s -mtriple=i686-unknown-unknown -S -inline | FileCheck %s + +define i32 @func_target_cpu_nocona() #0 { + ret i32 0 +} + +; CHECK-LABEL: @target_cpu_prescott_call_target_cpu_nocona( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_prescott_call_target_cpu_nocona() #1 { + %call = call i32 @func_target_cpu_nocona() + ret i32 %call +} + +attributes #0 = { nounwind "target-cpu"="nocona" } +attributes #1 = { nounwind "target-cpu"="prescott" } Index: llvm/trunk/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll =================================================================== --- llvm/trunk/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll +++ llvm/trunk/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll @@ -0,0 +1,43 @@ +; RUN: opt < %s -mtriple=x86_64-unknown-unknown -S -inline | FileCheck %s + +define i32 @func_target_cpu_base() #0 { + ret i32 0 +} + +; CHECK-LABEL: @target_cpu_k8_call_target_cpu_base( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_k8_call_target_cpu_base() #1 { + %call = call i32 @func_target_cpu_base() + ret i32 %call +} + +; CHECK-LABEL: @target_cpu_target_nehalem_call_target_cpu_base( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_target_nehalem_call_target_cpu_base() #2 { + %call = call i32 @func_target_cpu_base() + ret i32 %call +} + +; CHECK-LABEL: @target_cpu_target_goldmont_call_target_cpu_base( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_target_goldmont_call_target_cpu_base() #3 { + %call = call i32 @func_target_cpu_base() + ret i32 %call +} + +define i32 @func_target_cpu_nocona() #4 { + ret i32 0 +} + +; CHECK-LABEL: @target_cpu_target_base_call_target_cpu_nocona( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_target_base_call_target_cpu_nocona() #0 { + %call = call i32 @func_target_cpu_nocona() + ret i32 %call +} + +attributes #0 = { nounwind "target-cpu"="x86-64" } +attributes #1 = { nounwind "target-cpu"="k8" } +attributes #2 = { nounwind "target-cpu"="nehalem" } +attributes #3 = { nounwind "target-cpu"="goldmont" } +attributes #4 = { nounwind "target-cpu"="nocona" "target-features"="-sse3" }