diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -180,6 +180,10 @@ unsigned getMaxInterleaveFactor(unsigned VF) { return ST->getMaxInterleaveFactor(); } + + InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, + ArrayRef Mask, int Index, + VectorType *SubTp); }; } // end namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -162,3 +162,21 @@ getMemoryOpCost(Opcode, VTy->getElementType(), Alignment, 0, CostKind, I); return NumLoads * MemOpCost; } + +InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, + VectorType *Tp, ArrayRef Mask, + int Index, VectorType *SubTp) { + if (!ST->hasStdExtV() || + (Tp->getElementType()->isHalfTy() && !ST->hasStdExtZfh()) || + (Tp->getElementType()->isFloatTy() && !ST->hasStdExtF()) || + (Tp->getElementType()->isDoubleTy() && !ST->hasStdExtD())) + return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp); + + Kind = improveShuffleKindFromMask(Kind, Mask); + // Todo: handle other TTI::ShuffleKind kinds + if (Kind == TTI::SK_Broadcast) { + return TLI->getTypeLegalizationCost(DL, Tp).first; + } + + return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp); +} diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=riscv64--linux-gnu -mattr=+experimental-v,+f -cost-model -analyze | FileCheck %s + +define void @broadcast() { +; CHECK-LABEL: 'broadcast' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %v7 = shufflevector <2 x i8> undef, <2 x i8>undef, <2 x i32> zeroinitializer + %v8 = shufflevector <4 x i8> undef, <4 x i8>undef, <4 x i32> zeroinitializer + %v9 = shufflevector <8 x i8> undef, <8 x i8>undef, <8 x i32> zeroinitializer + %v10 = shufflevector <16 x i8> undef, <16 x i8>undef, <16 x i32> zeroinitializer + + %v11 = shufflevector <2 x i16> undef, <2 x i16>undef, <2 x i32> zeroinitializer + %v12 = shufflevector <4 x i16> undef, <4 x i16>undef, <4 x i32> zeroinitializer + %v13 = shufflevector <8 x i16> undef, <8 x i16>undef, <8 x i32> zeroinitializer + + %v14 = shufflevector <2 x i32> undef, <2 x i32>undef, <2 x i32> zeroinitializer + %v15 = shufflevector <4 x i32> undef, <4 x i32>undef, <4 x i32> zeroinitializer + + %v16 = shufflevector <2 x float> undef, <2 x float>undef, <2 x i32> zeroinitializer + %v17 = shufflevector <4 x float> undef, <4 x float>undef, <4 x i32> zeroinitializer + + ret void +}