diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -302,6 +302,26 @@ return IndexCost + getLMULCost(LT.second); } } + break; + } + case TTI::SK_PermuteTwoSrc: { + if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) { + // 2 x (vrgather + cost of generating the mask constant) + cost of mask + // register for the second vrgather. We model this for an unknown + // (shuffle) mask. + if (LT.first == 1 && + (LT.second.getScalarSizeInBits() != 8 || + LT.second.getVectorNumElements() <= 256)) { + auto &C = Tp->getContext(); + auto EC = Tp->getElementCount(); + VectorType *IdxTy = VectorType::get(IntegerType::getInt8Ty(C), EC); + VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC); + InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); + InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind); + return 2 * IndexCost + 2 * getLMULCost(LT.second) + MaskCost; + } + } + break; } } }; diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll @@ -52,34 +52,34 @@ define void @general_permute_two_source() { ; ; CHECK-LABEL: 'general_permute_two_source' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2half = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4half = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v8half = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v16half = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2float = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4float = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v8float = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v16float = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2double = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4double = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v8double = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v16double = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2half = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4half = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8half = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v16half = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2float = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4float = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v8float = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v16float = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2double = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v4double = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v8double = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v16double = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32>