diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -250,38 +250,9 @@ ArrayRef Args) { std::pair LT = getTypeLegalizationCost(Tp); - if (isa(Tp)) { - switch (Kind) { - default: - // Fallthrough to generic handling. - // TODO: Most of these cases will return getInvalid in generic code, and - // must be implemented here. - break; - case TTI::SK_Broadcast: { - return LT.first * 1; - } - case TTI::SK_Splice: - // vslidedown+vslideup. - // TODO: Multiplying by LT.first implies this legalizes into multiple copies - // of similar code, but I think we expand through memory. - return 2 * LT.first * getLMULCost(LT.second); - case TTI::SK_Reverse: - // Most of the cost here is producing the vrgather index register - // Example sequence: - // csrr a0, vlenb - // srli a0, a0, 3 - // addi a0, a0, -1 - // vsetvli a1, zero, e8, mf8, ta, mu (ignored) - // vid.v v9 - // vrsub.vx v10, v9, a0 - // vrgather.vv v9, v8, v10 - if (Tp->getElementType()->isIntegerTy(1)) - // Mask operation additionally required extend and truncate - return LT.first * 9; - return LT.first * 6; - } - } - + // First, handle cases where having a fixed length vector enables us to + // give a more accurate cost than falling back to generic scalable codegen. + // TODO: Each of these cases hints at a modeling gap around scalable vectors. if (isa(Tp)) { switch (Kind) { default: @@ -350,6 +321,36 @@ } }; + // Handle scalable vectors (and fixed vectors legalized to scalable vectors). + switch (Kind) { + default: + // Fallthrough to generic handling. + // TODO: Most of these cases will return getInvalid in generic code, and + // must be implemented here. + break; + case TTI::SK_Broadcast: { + return LT.first * 1; + } + case TTI::SK_Splice: + // vslidedown+vslideup. + // TODO: Multiplying by LT.first implies this legalizes into multiple copies + // of similar code, but I think we expand through memory. + return 2 * LT.first * getLMULCost(LT.second); + case TTI::SK_Reverse: + // Most of the cost here is producing the vrgather index register + // Example sequence: + // csrr a0, vlenb + // srli a0, a0, 3 + // addi a0, a0, -1 + // vsetvli a1, zero, e8, mf8, ta, mu (ignored) + // vid.v v9 + // vrsub.vx v10, v9, a0 + // vrgather.vv v9, v8, v10 + if (Tp->getElementType()->isIntegerTy(1)) + // Mask operation additionally required extend and truncate + return LT.first * 9; + return LT.first * 6; + } return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp); } diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll @@ -9,27 +9,27 @@ ; ; CHECK-LABEL: 'reverse' ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32>