diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -333,7 +333,12 @@ // TODO: Multiplying by LT.first implies this legalizes into multiple copies // of similar code, but I think we expand through memory. return 2 * LT.first * getLMULCost(LT.second); - case TTI::SK_Reverse: + case TTI::SK_Reverse: { + // TODO: Cases to improve here: + // * LMUL > 1 + // * i64 on RV32 + // * i1 vector + // Most of the cost here is producing the vrgather index register // Example sequence: // csrr a0, vlenb @@ -343,10 +348,15 @@ // vid.v v9 // vrsub.vx v10, v9, a0 // vrgather.vv v9, v8, v10 + unsigned LenCost = 3; + if (LT.second.isFixedLengthVector()) + // vrsub.vi has a 5 bit immediate field, otherwise an li suffices + LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1; if (Tp->getElementType()->isIntegerTy(1)) // Mask operation additionally required extend and truncate - return LT.first * 9; - return LT.first * 6; + return LT.first * (LenCost + 6); + return LT.first * (LenCost + 3); + } } return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp); } diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll @@ -8,28 +8,28 @@ define void @reverse() { ; ; CHECK-LABEL: 'reverse' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32>