diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -8,7 +8,9 @@ #include "RISCVTargetTransformInfo.h" #include "MCTargetDesc/RISCVMatInt.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/CostTable.h" #include "llvm/CodeGen/TargetLowering.h" @@ -323,6 +325,32 @@ return LT.first * getLMULCost(LT.second); } + if (isa(Tp) && Kind == TTI::SK_PermuteSingleSrc && + Mask.size() >= 2) { + std::pair LT = getTypeLegalizationCost(Tp); + if (LT.second.isFixedLengthVector()) { + MVT EltTp = LT.second.getVectorElementType(); + // If the size of the element is < ELEN then shuffles of interleaves and + // deinterleaves of 2 vectors can be lowered into the following sequences + if (EltTp.getScalarSizeInBits() < ST->getELEN()) { + auto InterleaveMask = createInterleaveMask(Mask.size() / 2, 2); + auto DeinterleaveMask = createStrideMask(Mask[0], 2, Mask.size()); + // Example sequence: + // vsetivli zero, 4, e8, mf4, ta, ma (ignored) + // vwaddu.vv v10, v8, v9 + // li a0, -1 (ignored) + // vwmaccu.vx v10, a0, v9 + if (equal(InterleaveMask, Mask)) + return 2 * LT.first * getLMULCost(LT.second); + + // Example sequence: + // vnsrl.wi v10, v8, 0 + if (equal(DeinterleaveMask, Mask)) + return LT.first * getLMULCost(LT.second); + } + } + } + return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp); } diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll @@ -4,7 +4,7 @@ define <8 x i8> @interleave2_v8i8(<4 x i8> %v0, <4 x i8> %v1) { ; CHECK-LABEL: 'interleave2_v8i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %concat = shufflevector <4 x i8> %v0, <4 x i8> %v1, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = shufflevector <8 x i8> %concat, <8 x i8> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = shufflevector <8 x i8> %concat, <8 x i8> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res ; %concat = shufflevector <4 x i8> %v0, <4 x i8> %v1, <8 x i32> @@ -15,7 +15,7 @@ define <8 x i32> @interleave2_v8i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: 'interleave2_v8i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %concat = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = shufflevector <8 x i32> %concat, <8 x i32> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = shufflevector <8 x i32> %concat, <8 x i32> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %res ; %concat = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32>