Index: llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h =================================================================== --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -123,6 +123,9 @@ return ST->useRVVForFixedLengthVectors() ? 16 : 0; } + InstructionCost getVRGatherVVCost(MVT VT, bool Masked, LLVMContext &C, + TTI::TargetCostKind CostKind); + InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, Index: llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -259,6 +259,23 @@ return cast(EVT(IndexVT).getTypeForEVT(C)); } +/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv +/// is generally quadratic in the number of vreg implied by LMUL, and for a +/// generic shuffle and (optional) generic mask we have to account for the cost +/// of materializing those as constants in a vreg. +InstructionCost RISCVTTIImpl::getVRGatherVVCost(MVT VT, bool Masked, + LLVMContext &C, + TTI::TargetCostKind CostKind) { + VectorType *IdxTy = getVRGatherIndexType(VT, *ST, C); + InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); + InstructionCost MaskCost = 0; + if (Masked) { + auto EC = VT.getVectorElementCount(); + VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC); + MaskCost = getConstantPoolLoadCost(MaskTy, CostKind); + } + return IndexCost + getLMULCost(VT) * getLMULCost(VT) + MaskCost; +} InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, @@ -304,11 +321,9 @@ // We model this for an unknown mask with a single vrgather. if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 || - LT.second.getVectorNumElements() <= 256)) { - VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext()); - InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); - return IndexCost + getLMULCost(LT.second); - } + LT.second.getVectorNumElements() <= 256)) + return getVRGatherVVCost(LT.second, false, Tp->getContext(), + CostKind); } break; } @@ -321,13 +336,10 @@ if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 || LT.second.getVectorNumElements() <= 256)) { - auto &C = Tp->getContext(); - auto EC = Tp->getElementCount(); - VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, C); - VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC); - InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); - InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind); - return 2 * IndexCost + 2 * getLMULCost(LT.second) + MaskCost; + InstructionCost Cost = + getVRGatherVVCost(LT.second, false, Tp->getContext(), CostKind) + + getVRGatherVVCost(LT.second, true, Tp->getContext(), CostKind); + return Cost; } } break; Index: llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll =================================================================== --- llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll +++ llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll @@ -40,12 +40,12 @@ define <8 x i64> @interleave2_v8i64(<4 x i64> %v0, <4 x i64> %v1) { ; RV32-LABEL: 'interleave2_v8i64' ; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %concat = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res ; ; RV64-LABEL: 'interleave2_v8i64' ; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %concat = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res ; %concat = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> Index: llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll =================================================================== --- llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll +++ llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll @@ -12,16 +12,16 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> @@ -59,27 +59,27 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2half = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4half = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8half = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16half = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v16half = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2float = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4float = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8float = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v16float = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v8float = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v16float = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2double = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v4double = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v8double = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v16double = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v4double = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %v8double = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %v16double = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32>