Index: lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.h +++ lib/Target/ARM/ARMTargetTransformInfo.h @@ -33,6 +33,39 @@ const ARMSubtarget *ST; const ARMTargetLowering *TLI; + // Currently the following features are excluded from InlineFeatureWhitelist. + // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureVFPOnlySP, FeatureD16 + // Depending on whether they are set or unset, different + // instructions/registers are available. For example, inlining a callee with + // -thumb-mode in a caller with +thumb-mode, may cause the assembler to + // fail if the callee uses ARM only instructions, e.g. in inline asm. + const FeatureBitset InlineFeatureWhitelist = { + ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2, + ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8, + ARM::FeatureFullFP16, ARM::FeatureHWDivThumb, + ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex, + ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc, + ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt, + ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS, + ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing, + ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32, + ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR, + ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits, + ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg, + ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx, + ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs, + ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign, + ARM::FeatureHasSlowFPVMLx, ARM::FeatureVMLxForwarding, + ARM::FeaturePref32BitThumb, ARM::FeatureAvoidPartialCPSR, + ARM::FeatureCheapPredicableCPSR, ARM::FeatureAvoidMOVsShOp, + ARM::FeatureHasRetAddrStack, ARM::FeatureHasNoBranchPredictor, + ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization, + ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass, + ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls, + ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt, + ARM::FeatureNoNegativeImmediates + }; + const ARMSubtarget *getST() const { return ST; } const ARMTargetLowering *getTLI() const { return TLI; } @@ -41,6 +74,9 @@ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const; + bool enableInterleavedAccessVectorization() { return true; } /// Floating-point computation using ARMv8 AArch32 Advanced Index: lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.cpp +++ lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -15,6 +15,24 @@ #define DEBUG_TYPE "armtti" +bool ARMTTIImpl::areInlineCompatible(const Function *Caller, + const Function *Callee) const { + const TargetMachine &TM = getTLI()->getTargetMachine(); + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + + // To inline a callee, all features not in the whitelist must match exactly. + bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) == + (CalleeBits & ~InlineFeatureWhitelist); + // For features in the whitelist, the callee's features must be a subset of + // the callers'. + bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) == + (CalleeBits & InlineFeatureWhitelist); + return MatchExact && MatchSubset; +} + int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); Index: test/Transforms/Inline/ARM/inline-target-attr.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/ARM/inline-target-attr.ll @@ -0,0 +1,60 @@ +; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -inline | FileCheck %s +; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s +; Check that we only inline when we have compatible target attributes. +; ARM has implemented a target attribute that will verify that the attribute +; sets are compatible. + +define i32 @foo() #0 { +entry: + %call = call i32 (...) @baz() + ret i32 %call +; CHECK-LABEL: foo +; CHECK: call i32 (...) @baz() +} +declare i32 @baz(...) #0 + +define i32 @bar() #1 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: bar +; CHECK: call i32 (...) @baz() +} + +define i32 @qux() #0 { +entry: + %call = call i32 @bar() + ret i32 %call +; CHECK-LABEL: qux +; CHECK: call i32 @bar() +} + +define i32 @thumb_fn() #2 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: thumb_fn +; CHECK: call i32 @foo +} + +define i32 @strict_align() #3 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: strict_align +; CHECK: call i32 (...) @baz() +} + +define i32 @soft_float_fn() #4 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: thumb_fn +; CHECK: call i32 @foo +} + +attributes #0 = { "target-cpu"="generic" "target-features"="+dsp,+neon" } +attributes #1 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16" } +attributes #2 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16,+thumb-mode" } +attributes #3 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+strict-align" } +attributes #4 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16,+soft-float" } Index: test/Transforms/Inline/ARM/lit.local.cfg =================================================================== --- /dev/null +++ test/Transforms/Inline/ARM/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'ARM' in config.root.targets: + config.unsupported = True