Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h @@ -934,6 +934,14 @@ bool areInlineCompatible(const Function *Caller, const Function *Callee) const; + /// \returns True if the caller and callee agree on how \p Args will be passed + /// to the callee. + /// \param[out] Args The list of compatible arguments. The implementation may + /// filter out any incompatible args from this list. + bool areFunctionArgsABICompatible(const Function *Caller, + const Function *Callee, + SmallPtrSetImpl &Args) const; + /// The type of load/store indexing. enum MemIndexedMode { MIM_Unindexed, ///< No indexing. @@ -1179,6 +1187,9 @@ unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0; virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const = 0; + virtual bool + areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, + SmallPtrSetImpl &Args) const = 0; virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0; virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0; virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0; @@ -1557,6 +1568,11 @@ const Function *Callee) const override { return Impl.areInlineCompatible(Caller, Callee); } + bool areFunctionArgsABICompatible( + const Function *Caller, const Function *Callee, + SmallPtrSetImpl &Args) const override { + return Impl.areFunctionArgsABICompatible(Caller, Callee, Args); + } bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override { return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout()); } Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -526,6 +526,14 @@ Callee->getFnAttribute("target-features")); } + bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, + SmallPtrSetImpl &Args) const { + return (Caller->getFnAttribute("target-cpu") == + Callee->getFnAttribute("target-cpu")) && + (Caller->getFnAttribute("target-features") == + Callee->getFnAttribute("target-features")); + } + bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, const DataLayout &DL) const { return false; Index: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp +++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp @@ -625,6 +625,12 @@ return TTIImpl->areInlineCompatible(Caller, Callee); } +bool TargetTransformInfo::areFunctionArgsABICompatible( + const Function *Caller, const Function *Callee, + SmallPtrSetImpl &Args) const { + return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args); +} + bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const { return TTIImpl->isIndexedLoadLegal(Mode, Ty); Index: llvm/trunk/lib/Transforms/IPO/ArgumentPromotion.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/ArgumentPromotion.cpp +++ llvm/trunk/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -49,6 +49,7 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -809,6 +810,21 @@ return false; } +static bool areFunctionArgsABICompatible( + const Function &F, const TargetTransformInfo &TTI, + SmallPtrSetImpl &ArgsToPromote, + SmallPtrSetImpl &ByValArgsToTransform) { + for (const Use &U : F.uses()) { + CallSite CS(U.getUser()); + const Function *Caller = CS.getCaller(); + const Function *Callee = CS.getCalledFunction(); + if (!TTI.areFunctionArgsABICompatible(Caller, Callee, ArgsToPromote) || + !TTI.areFunctionArgsABICompatible(Caller, Callee, ByValArgsToTransform)) + return false; + } + return true; +} + /// PromoteArguments - This method checks the specified function to see if there /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it @@ -817,7 +833,8 @@ promoteArguments(Function *F, function_ref AARGetter, unsigned MaxElements, Optional> - ReplaceCallSite) { + ReplaceCallSite, + const TargetTransformInfo &TTI) { // Don't perform argument promotion for naked functions; otherwise we can end // up removing parameters that are seemingly 'not used' as they are referred // to in the assembly. @@ -846,7 +863,7 @@ // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. Also see if the function - // is self-recursive. + // is self-recursive and check that target features are compatible. bool isSelfRecursive = false; for (Use &U : F->uses()) { CallSite CS(U.getUser()); @@ -955,6 +972,10 @@ if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return nullptr; + if (!areFunctionArgsABICompatible(*F, TTI, ArgsToPromote, + ByValArgsToTransform)) + return nullptr; + return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite); } @@ -980,7 +1001,9 @@ return FAM.getResult(F); }; - Function *NewF = promoteArguments(&OldF, AARGetter, MaxElements, None); + const TargetTransformInfo &TTI = FAM.getResult(OldF); + Function *NewF = + promoteArguments(&OldF, AARGetter, MaxElements, None, TTI); if (!NewF) continue; LocalChange = true; @@ -1018,6 +1041,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); getAAResultsAnalysisUsage(AU); CallGraphSCCPass::getAnalysisUsage(AU); } @@ -1043,6 +1067,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(ArgPromotion, "argpromotion", "Promote 'by reference' arguments to scalars", false, false) @@ -1079,8 +1104,10 @@ CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode); }; + const TargetTransformInfo &TTI = + getAnalysis().getTTI(*OldF); if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements, - {ReplaceCallSite})) { + {ReplaceCallSite}, TTI)) { LocalChange = true; // Update the call graph for the newly promoted function. Index: llvm/trunk/test/Transforms/ArgumentPromotion/X86/attributes.ll =================================================================== --- llvm/trunk/test/Transforms/ArgumentPromotion/X86/attributes.ll +++ llvm/trunk/test/Transforms/ArgumentPromotion/X86/attributes.ll @@ -0,0 +1,53 @@ +; RUN: opt -S -argpromotion < %s | FileCheck %s +; RUN: opt -S -passes=argpromotion < %s | FileCheck %s +; Test that we only promote arguments when the caller/callee have compatible +; function attrubtes. + +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) +define internal fastcc void @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { +bb: + %tmp = load <4 x i64>, <4 x i64>* %arg1 + store <4 x i64> %tmp, <4 x i64>* %arg + ret void +} + +define void @no_promote(<4 x i64>* %arg) #1 { +bb: + %tmp = alloca <4 x i64>, align 32 + %tmp2 = alloca <4 x i64>, align 32 + %tmp3 = bitcast <4 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @no_promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp) + %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32 + store <4 x i64> %tmp4, <4 x i64>* %arg, align 2 + ret void +} + +; CHECK-LABEL: @promote_avx2(<4 x i64>* %arg, <4 x i64> % +define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { +bb: + %tmp = load <4 x i64>, <4 x i64>* %arg1 + store <4 x i64> %tmp, <4 x i64>* %arg + ret void +} + +define void @promote(<4 x i64>* %arg) #0 { +bb: + %tmp = alloca <4 x i64>, align 32 + %tmp2 = alloca <4 x i64>, align 32 + %tmp3 = bitcast <4 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp) + %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32 + store <4 x i64> %tmp4, <4 x i64>* %arg, align 2 + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #2 + +attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" } +attributes #1 = { nounwind uwtable } +attributes #2 = { argmemonly nounwind }