Index: lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.h +++ lib/Target/ARM/ARMTargetTransformInfo.h @@ -41,6 +41,9 @@ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const; + bool enableInterleavedAccessVectorization() { return true; } /// Floating-point computation using ARMv8 AArch32 Advanced Index: lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.cpp +++ lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -15,6 +15,22 @@ #define DEBUG_TYPE "armtti" +bool ARMTTIImpl::areInlineCompatible(const Function *Caller, + const Function *Callee) const { + const TargetMachine &TM = getTLI()->getTargetMachine(); + + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + const FeatureBitset ThumbMode(ARM::ModeThumb); + + // Inline a callee if the mode matches and the remaining target-features are a + // subset of the callers target-features. + return CallerBits[ARM::ModeThumb] == CalleeBits[ARM::ModeThumb] && + ((CallerBits & CalleeBits) | ThumbMode) == (CalleeBits | ThumbMode); +} + int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); Index: test/Transforms/Inline/ARM/inline-target-attr.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/ARM/inline-target-attr.ll @@ -0,0 +1,51 @@ +; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -inline | FileCheck %s +; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s +; Check that we only inline when we have compatible target attributes. +; ARM has implemented a target attribute that will verify that the attribute +; sets are compatible. + +define i32 @foo() #0 { +entry: + %call = call i32 (...) @baz() + ret i32 %call +; CHECK-LABEL: foo +; CHECK: call i32 (...) @baz() +} +declare i32 @baz(...) #0 + +define i32 @bar() #1 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: bar +; CHECK: call i32 (...) @baz() +} + +define i32 @qux() #0 { +entry: + %call = call i32 @bar() + ret i32 %call +; CHECK-LABEL: qux +; CHECK: call i32 @bar() +} + +define i32 @thumb_fn() #2 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: thumb_fn +; CHECK: call i32 @foo +} + +define i32 @strict_align() #3 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: strict_align +; CHECK: call i32 (...) @baz() +} + +attributes #0 = { "target-cpu"="generic" "target-features"="+dsp,+neon" } +attributes #1 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16" } +attributes #2 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16,+thumb-mode" } +attributes #3 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+strict-align" } Index: test/Transforms/Inline/ARM/lit.local.cfg =================================================================== --- /dev/null +++ test/Transforms/Inline/ARM/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'ARM' in config.root.targets: + config.unsupported = True