diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -114,12 +114,14 @@ /// Estimate a cost of subvector extraction as a sequence of extract and /// insert operations. - unsigned getExtractSubvectorOverhead(FixedVectorType *VTy, int Index, + unsigned getExtractSubvectorOverhead(VectorType *VTy, int Index, FixedVectorType *SubVTy) { assert(VTy && SubVTy && "Can only extract subvectors from vectors"); int NumSubElts = SubVTy->getNumElements(); - assert((Index + NumSubElts) <= (int)VTy->getNumElements() && + assert((!isa<FixedVectorType>(VTy) || + (Index + NumSubElts) <= + (int)cast<FixedVectorType>(VTy)->getNumElements()) && "SK_ExtractSubvector index out of range"); unsigned Cost = 0; @@ -137,12 +139,14 @@ /// Estimate a cost of subvector insertion as a sequence of extract and /// insert operations. - unsigned getInsertSubvectorOverhead(FixedVectorType *VTy, int Index, + unsigned getInsertSubvectorOverhead(VectorType *VTy, int Index, FixedVectorType *SubVTy) { assert(VTy && SubVTy && "Can only insert subvectors into vectors"); int NumSubElts = SubVTy->getNumElements(); - assert((Index + NumSubElts) <= (int)VTy->getNumElements() && + assert((!isa<FixedVectorType>(VTy) || + (Index + NumSubElts) <= + (int)cast<FixedVectorType>(VTy)->getNumElements()) && "SK_InsertSubvector index out of range"); unsigned Cost = 0; @@ -723,10 +727,10 @@ case TTI::SK_PermuteTwoSrc: return getPermuteShuffleOverhead(cast<FixedVectorType>(Tp)); case TTI::SK_ExtractSubvector: - return getExtractSubvectorOverhead(cast<FixedVectorType>(Tp), Index, + return getExtractSubvectorOverhead(Tp, Index, cast<FixedVectorType>(SubTp)); case TTI::SK_InsertSubvector: - return getInsertSubvectorOverhead(cast<FixedVectorType>(Tp), Index, + return getInsertSubvectorOverhead(Tp, Index, cast<FixedVectorType>(SubTp)); } llvm_unreachable("Unknown TTI::ShuffleKind"); @@ -1255,6 +1259,26 @@ return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0], VarMask, Alignment, CostKind, I); } + case Intrinsic::experimental_vector_extract: { + // FIXME: Handle case where a scalable vector is extracted from a scalable + // vector + if (isa<ScalableVectorType>(RetTy)) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); + unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue(); + return thisT()->getShuffleCost(TTI::SK_ExtractSubvector, + cast<VectorType>(Args[0]->getType()), + Index, cast<VectorType>(RetTy)); + } + case Intrinsic::experimental_vector_insert: { + // FIXME: Handle case where a scalable vector is inserted into a scalable + // vector + if (isa<ScalableVectorType>(Args[1]->getType())) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); + unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue(); + return thisT()->getShuffleCost( + TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), Index, + cast<VectorType>(Args[1]->getType())); + } case Intrinsic::vector_reduce_add: case Intrinsic::vector_reduce_mul: case Intrinsic::vector_reduce_and: diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll @@ -0,0 +1,42 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck %s + +define <16 x i32> @extract_cost(<vscale x 4 x i32> %vec) { +; CHECK-LABEL: 'extract_cost' +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ret = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ret + + %ret = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0) + ret <16 x i32> %ret +} + +define <vscale x 4 x i32> @insert_cost(<vscale x 4 x i32> %vec, <16 x i32> %subVec) { +; CHECK-LABEL: 'insert_cost' +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ret = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %vec, <16 x i32> %subVec, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 4 x i32> %ret + + %ret = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %vec, <16 x i32> %subVec, i64 0) + ret <vscale x 4 x i32> %ret +} + +define <vscale x 4 x i32> @extract_cost_scalable(<vscale x 16 x i32> %vec) { +; CHECK-LABEL: 'extract_cost_scalable' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ret = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 4 x i32> %ret + + %ret = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0) + ret <vscale x 4 x i32> %ret +} + +define <vscale x 16 x i32> @insert_cost_scalable(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subVec) { +; CHECK-LABEL: 'insert_cost_scalable' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ret = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subVec, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 16 x i32> %ret + + %ret = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subVec, i64 0) + ret <vscale x 16 x i32> %ret +} + +declare <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32>, i64) +declare <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32>, <16 x i32>, i64) +declare <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32>, i64) +declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32>, <vscale x 4 x i32>, i64)