diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -133,7 +133,9 @@ bool UseMaskForCond = false, bool UseMaskForGaps = false); InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); - + bool areFunctionArgsABICompatible(const Function *Caller, + const Function *Callee, + SmallPtrSetImpl &Args) const; /// @} }; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1220,6 +1220,26 @@ return BaseT::getIntrinsicInstrCost(ICA, CostKind); } +bool PPCTTIImpl::areFunctionArgsABICompatible( + const Function *Caller, const Function *Callee, + SmallPtrSetImpl &Args) const { + if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args)) + return false; + + const DataLayout &CallerDL = Caller->getParent()->getDataLayout(); + const DataLayout &CalleeDL = Callee->getParent()->getDataLayout(); + + return llvm::none_of(Args, [CallerDL, CalleeDL](Argument *A) { + auto *EltTy = cast(A->getType())->getElementType(); + if (EltTy->isSized()) { + unsigned CallerTypeSize = CallerDL.getTypeSizeInBits(EltTy); + unsigned CalleeTypeSize = CalleeDL.getTypeSizeInBits(EltTy); + return (CallerTypeSize > 128 || CalleeTypeSize > 128); + } + return false; + }); +} + bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) { diff --git a/llvm/test/CodeGen/PowerPC/arg_promotion.ll b/llvm/test/CodeGen/PowerPC/arg_promotion.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/arg_promotion.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -argpromotion -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: opt -S -passes=argpromotion -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +; Test to check that we do not promote arguments when the +; type size is greater than 128 bits. + +define internal fastcc void @print_acc(<512 x i1>* nocapture readonly %a) nounwind { +; CHECK-LABEL: @print_acc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, <512 x i1>* [[A:%.*]], align 64 +; CHECK-NEXT: [[TMP1:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP1]], 0 +; CHECK-NEXT: ret void +; +entry: + %0 = load <512 x i1>, <512 x i1>* %a, align 64 + %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0) + %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0 + ret void +} + +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>) nounwind + +define dso_local void @test(<512 x i1>* nocapture %a, <16 x i8> %ac) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> [[AC:%.*]], <16 x i8> [[AC]]) +; CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[A:%.*]], align 64 +; CHECK-NEXT: tail call fastcc void @print_acc(<512 x i1>* nonnull [[A]]) +; CHECK-NEXT: ret void +; +entry: + %0 = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> %ac, <16 x i8> %ac) + store <512 x i1> %0, <512 x i1>* %a, align 64 + tail call fastcc void @print_acc(<512 x i1>* nonnull %a) + ret void +} + +declare <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8>, <16 x i8>) nounwind