diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -132,7 +132,9 @@ bool UseMaskForCond = false, bool UseMaskForGaps = false); InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); - + bool areFunctionArgsABICompatible(const Function *Caller, + const Function *Callee, + SmallPtrSetImpl &Args) const; /// @} }; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1221,6 +1221,27 @@ return BaseT::getIntrinsicInstrCost(ICA, CostKind); } + +bool PPCTTIImpl::areFunctionArgsABICompatible( + const Function *Caller, const Function *Callee, + SmallPtrSetImpl &Args) const { + if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args)) + return false; + + const DataLayout &CallerDL = Caller->getParent()->getDataLayout(); + const DataLayout &CalleeDL = Callee->getParent()->getDataLayout(); + + return llvm::none_of(Args, [CallerDL, CalleeDL](Argument *A) { + auto *EltTy = cast(A->getType())->getElementType(); + if (EltTy->isSized()) { + unsigned CallerTypeSize = CallerDL.getTypeSizeInBits(EltTy); + unsigned CalleeTypeSize = CalleeDL.getTypeSizeInBits(EltTy); + return (CallerTypeSize > 128 || CalleeTypeSize > 128); + } + return false; + }); +} + bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) { diff --git a/llvm/test/CodeGen/PowerPC/arg_promotion.ll b/llvm/test/CodeGen/PowerPC/arg_promotion.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/arg_promotion.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s + +define dso_local void @test(<512 x i1>* nocapture %a, <16 x i8> %ac) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvf32ger acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r3) +; CHECK-NEXT: stxv vs1, 32(r3) +; CHECK-NEXT: stxv vs2, 16(r3) +; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: b print_acc@notoc +; CHECK-NEXT: #TC_RETURNd8 print_acc@notoc 0 +entry: + %0 = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> %ac, <16 x i8> %ac) + store <512 x i1> %0, <512 x i1>* %a, align 64 + tail call fastcc void @print_acc(<512 x i1>* nonnull %a) + ret void +} + +declare <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8>, <16 x i8>) nounwind + +define internal fastcc void @print_acc(<512 x i1>* nocapture readonly %a) nounwind { +; CHECK-LABEL: print_acc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: blr +entry: + %0 = load <512 x i1>, <512 x i1>* %a, align 64 + %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0) + %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0 + ret void +} + +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>) nounwind