diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -129,7 +129,8 @@ public: IntrinsicCostAttributes( Intrinsic::ID Id, const CallBase &CI, - InstructionCost ScalarCost = InstructionCost::getInvalid()); + InstructionCost ScalarCost = InstructionCost::getInvalid(), + bool TypeBasedOnly=false); IntrinsicCostAttributes( Intrinsic::ID Id, Type *RTy, ArrayRef Tys, diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1778,6 +1778,34 @@ return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0, CostKind); } + + case Intrinsic::masked_scatter: { + // For type based cost assume worse case scenario. + bool VarMask = true; + Type *Ty = Tys[0]; + Align TyAlign = thisT()->DL.getABITypeAlign(Ty); + // arguments below are actually not used by + // getGatherScatterOpCost + const Value *Ptr = nullptr; + const Instruction *I = nullptr; + return thisT()->getGatherScatterOpCost(Instruction::Store, + Ty, Ptr, VarMask, TyAlign, + CostKind, I); + } + + case Intrinsic::masked_gather: { + // For type based cost assume worse case scenario. + bool VarMask = true; + Type *Ty = RetTy; + Align TyAlign = thisT()->DL.getABITypeAlign(Ty); + // arguments below are actually not used by + // getGatherScatterOpCost + const Value *Ptr = nullptr; + const Instruction *I = nullptr; + return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Ptr, + VarMask, TyAlign, CostKind, I); + } + case Intrinsic::vector_reduce_add: return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy, None, CostKind); diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp --- a/llvm/lib/Analysis/CostModel.cpp +++ b/llvm/lib/Analysis/CostModel.cpp @@ -25,6 +25,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/IR/IntrinsicInst.h" using namespace llvm; static cl::opt CostKind( @@ -39,6 +40,9 @@ clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency"))); +static cl::opt TypeBasedIntrinsicCost("type-based-intrinsic-cost", + cl::desc("Calculate intrinsics cost based only on argument types"), + cl::init(false)); #define CM_NAME "cost-model" #define DEBUG_TYPE CM_NAME @@ -103,7 +107,15 @@ for (BasicBlock &B : *F) { for (Instruction &Inst : B) { - InstructionCost Cost = TTI->getInstructionCost(&Inst, CostKind); + InstructionCost Cost; + if (TypeBasedIntrinsicCost && isa(&Inst)) { + auto *II = dyn_cast(&Inst); + IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II, + InstructionCost::getInvalid(), true); + Cost = TTI->getIntrinsicInstrCost(ICA, CostKind); + } + else + Cost = TTI->getInstructionCost(&Inst, CostKind); if (auto CostVal = Cost.getValue()) OS << "Cost Model: Found an estimated cost of " << *CostVal; else @@ -122,7 +134,15 @@ for (Instruction &Inst : B) { // TODO: Use a pass parameter instead of cl::opt CostKind to determine // which cost kind to print. - InstructionCost Cost = TTI.getInstructionCost(&Inst, CostKind); + InstructionCost Cost; + if (TypeBasedIntrinsicCost && isa(&Inst)) { + auto *II = dyn_cast(&Inst); + IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II, + InstructionCost::getInvalid(), true); + Cost = TTI.getIntrinsicInstrCost(ICA, CostKind); + } + else + Cost = TTI.getInstructionCost(&Inst, CostKind); if (auto CostVal = Cost.getValue()) OS << "Cost Model: Found an estimated cost of " << *CostVal; else diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -58,14 +58,16 @@ } IntrinsicCostAttributes::IntrinsicCostAttributes( - Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost) + Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost, + bool TypeBasedOnly) : II(dyn_cast(&CI)), RetTy(CI.getType()), IID(Id), ScalarizationCost(ScalarizationCost) { if (const auto *FPMO = dyn_cast(&CI)) FMF = FPMO->getFastMathFlags(); - Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end()); + if (!TypeBasedOnly) + Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end()); FunctionType *FTy = CI.getCalledFunction()->getFunctionType(); ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); } diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -passes='print' 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s +; RUN: opt < %s -passes='print' 2>&1 -type-based-intrinsic-cost -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=TYPE_BASED_ONLY define void @vector_insert_extract( %v0, %v1, <16 x i32> %v2) { ; CHECK-LABEL: 'vector_insert_extract' @@ -500,6 +501,66 @@ ret void } +define @masked_gather_nxv4i32( %ld, %masks, %passthru) { +; TYPE_BASED_ONLY-LABEL: 'masked_gather_nxv4i32' +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %ld, i32 0, %masks, %passthru) + %res = call @llvm.masked.gather.nxv4i32( %ld, i32 0, %masks, %passthru) + ret %res +} + +define @masked_gather_nxv8i32( %ld, %masks, %passthru) { +; TYPE_BASED_ONLY-LABEL: 'masked_gather_nxv8i32' +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res = call @llvm.masked.gather.nxv8i32.nxv8p0i32( %ld, i32 0, %masks, %passthru) + %res = call @llvm.masked.gather.nxv8i32( %ld, i32 0, %masks, %passthru) + ret %res +} + +define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ld, <4 x i1> %masks, <4 x i32> %passthru) { +; TYPE_BASED_ONLY-LABEL: 'masked_gather_v4i32' +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru) + %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru) + ret <4 x i32> %res +} + +define <1 x i128> @masked_gather_v1i128(<1 x i128*> %ld, <1 x i1> %masks, <1 x i128> %passthru) { +; TYPE_BASED_ONLY-LABEL: 'masked_gather_v1i128' +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru) + %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru) + ret <1 x i128> %res +} + +define void @masked_scatter_nxv4i32( %data, %ptrs, %masks) { +; TYPE_BASED_ONLY-LABEL: 'masked_scatter_nxv4i32' +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( %data, %ptrs, i32 0, %masks) + + call void @llvm.masked.scatter.nxv4i32( %data, %ptrs, i32 0, %masks) + ret void +} + +define void @masked_scatter_nxv8i32( %data, %ptrs, %masks) { +; TYPE_BASED_ONLY-LABEL: 'masked_scatter_nxv8i32' +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( %data, %ptrs, i32 0, %masks) + + call void @llvm.masked.scatter.nxv8i32( %data, %ptrs, i32 0, %masks) + ret void +} + +define void @masked_scatter_v4i32(<4 x i32> %data, <4 x i32*> %ptrs, <4 x i1> %masks) { +; TYPE_BASED_ONLY-LABEL: 'masked_scatter_v4i32' +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %data, <4 x i32*> %ptrs, i32 0, <4 x i1> %masks) + + call void @llvm.masked.scatter.v4i32(<4 x i32> %data, <4 x i32*> %ptrs, i32 0, <4 x i1> %masks) + ret void +} + +define void @masked_scatter_v1i128(<1 x i128> %data, <1 x i128*> %ptrs, <1 x i1> %masks) { +; TYPE_BASED_ONLY-LABEL: 'masked_scatter_v1i128' +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v1i128.v1p0i128(<1 x i128> %data, <1 x i128*> %ptrs, i32 0, <1 x i1> %masks) + + call void @llvm.masked.scatter.v1i128.v1p0i128(<1 x i128> %data, <1 x i128*> %ptrs, i32 0, <1 x i1> %masks) + ret void +} + declare @llvm.get.active.lane.mask.nxv16i1.i64(i64, i64) declare @llvm.get.active.lane.mask.nxv8i1.i64(i64, i64) declare @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64) @@ -528,5 +589,13 @@ declare @llvm.fshl.nxv8i16(, , ) declare @llvm.fshl.nxv4i32(, , ) declare @llvm.fshl.nxv2i64(, , ) +declare @llvm.masked.gather.nxv4i32( %ptrs, i32 %align, %masks, %passthru) +declare @llvm.masked.gather.nxv8i32( %ptrs, i32 %align, %masks, %passthru) +declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i32> %passthru) +declare <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*>, i32, <1 x i1>, <1 x i128>) +declare void @llvm.masked.scatter.nxv4i32( %data, %ptrs, i32 %align, %masks) +declare void @llvm.masked.scatter.nxv8i32( %data, %ptrs, i32 %align, %masks) +declare void @llvm.masked.scatter.v4i32(<4 x i32> %data, <4 x i32*> %ptrs, i32 %align, <4 x i1> %masks) +declare void @llvm.masked.scatter.v1i128.v1p0i128(<1 x i128> %data, <1 x i128*> %ptrs, i32 %align, <1 x i1> %masks) attributes #0 = { "target-features"="+sve,+bf16" }