Index: lib/Transforms/IPO/ArgumentPromotion.cpp =================================================================== --- lib/Transforms/IPO/ArgumentPromotion.cpp +++ lib/Transforms/IPO/ArgumentPromotion.cpp @@ -49,6 +49,7 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -808,6 +809,15 @@ return false; } +/// Test that there are no attribute conflicts between Caller and Callee +static bool functionsHaveCompatibleAttributes(const CallSite &CS, + const TargetTransformInfo &TTI) { + const Function *Caller = CS.getCaller(); + const Function *Callee = CS.getCalledFunction(); + return TTI.areInlineCompatible(Caller, Callee) && + AttributeFuncs::areInlineCompatible(*Caller, *Callee); +} + /// PromoteArguments - This method checks the specified function to see if there /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it @@ -816,7 +826,7 @@ promoteArguments(Function *F, function_ref AARGetter, unsigned MaxElements, Optional> - ReplaceCallSite) { + ReplaceCallSite, const TargetTransformInfo &TTI) { // Don't perform argument promotion for naked functions; otherwise we can end // up removing parameters that are seemingly 'not used' as they are referred // to in the assembly. @@ -845,7 +855,7 @@ // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. Also see if the function - // is self-recursive. + // is self-recursive and check that target features are compatible. bool isSelfRecursive = false; for (Use &U : F->uses()) { CallSite CS(U.getUser()); @@ -857,6 +867,9 @@ if (CS.isMustTailCall()) return nullptr; + if (!functionsHaveCompatibleAttributes(CS, TTI)) + return nullptr; + if (CS.getInstruction()->getParent()->getParent() == F) isSelfRecursive = true; } @@ -979,7 +992,8 @@ return FAM.getResult(F); }; - Function *NewF = promoteArguments(&OldF, AARGetter, MaxElements, None); + const TargetTransformInfo &TTI = FAM.getResult(OldF); + Function *NewF = promoteArguments(&OldF, AARGetter, MaxElements, None, TTI); if (!NewF) continue; LocalChange = true; @@ -1017,6 +1031,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); getAAResultsAnalysisUsage(AU); CallGraphSCCPass::getAnalysisUsage(AU); } @@ -1042,6 +1057,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(ArgPromotion, "argpromotion", "Promote 'by reference' arguments to scalars", false, false) @@ -1078,8 +1094,10 @@ CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode); }; + const TargetTransformInfo &TTI = + getAnalysis().getTTI(*OldF); if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements, - {ReplaceCallSite})) { + {ReplaceCallSite}, TTI)) { LocalChange = true; // Update the call graph for the newly promoted function. Index: test/Transforms/ArgumentPromotion/X86/attributes.ll =================================================================== --- /dev/null +++ test/Transforms/ArgumentPromotion/X86/attributes.ll @@ -0,0 +1,53 @@ +; RUN: opt -S -argpromotion < %s | FileCheck %s +; RUN: opt -S -passes=argpromotion < %s | FileCheck %s +; Test that we only promote arguments when the caller/callee have compatible +; function attrubtes. + +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) +define internal fastcc void @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { +bb: + %tmp = load <4 x i64>, <4 x i64>* %arg1 + store <4 x i64> %tmp, <4 x i64>* %arg + ret void +} + +define void @no_promote(<4 x i64>* %arg) #1 { +bb: + %tmp = alloca <4 x i64>, align 32 + %tmp2 = alloca <4 x i64>, align 32 + %tmp3 = bitcast <4 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @no_promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp) + %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32 + store <4 x i64> %tmp4, <4 x i64>* %arg, align 2 + ret void +} + +; CHECK-LABEL: @promote_avx2(<4 x i64>* %arg, <4 x i64> % +define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { +bb: + %tmp = load <4 x i64>, <4 x i64>* %arg1 + store <4 x i64> %tmp, <4 x i64>* %arg + ret void +} + +define void @promote(<4 x i64>* %arg) #0 { +bb: + %tmp = alloca <4 x i64>, align 32 + %tmp2 = alloca <4 x i64>, align 32 + %tmp3 = bitcast <4 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp) + %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32 + store <4 x i64> %tmp4, <4 x i64>* %arg, align 2 + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #2 + +attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" } +attributes #1 = { nounwind uwtable } +attributes #2 = { argmemonly nounwind }