diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -84,13 +84,13 @@ /// Estimate a cost of Broadcast as an extract and sequence of insert /// operations. - InstructionCost getBroadcastShuffleOverhead(FixedVectorType *VTy) { + InstructionCost getBroadcastShuffleOverhead(VectorType *VTy) { InstructionCost Cost = 0; // Broadcast cost is equal to the cost of extracting the zero'th element // plus the cost of inserting it into every element of the result vector. Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0); - for (int i = 0, e = VTy->getNumElements(); i < e; ++i) { + for (int i = 0, e = VTy->getElementCount().getKnownMinValue(); i < e; ++i) { Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i); } return Cost; @@ -875,7 +875,7 @@ switch (improveShuffleKindFromMask(Kind, Mask)) { case TTI::SK_Broadcast: - return getBroadcastShuffleOverhead(cast(Tp)); + return getBroadcastShuffleOverhead(Tp); case TTI::SK_Select: case TTI::SK_Splice: case TTI::SK_Reverse: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vectorizer-shufflecost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vectorizer-shufflecost.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vectorizer-shufflecost.ll @@ -0,0 +1,42 @@ +; REQUIRES: asserts +; RUN: opt -mtriple=riscv64 -mattr=+m,+experimental-v -loop-vectorize \ +; RUN: -scalable-vectorization=on -S < %s 2>&1 | FileCheck %s + +@a = global i32 0, align 4 +@b = global i32* null, align 8 + +; CHECK: +define void @test(i32 signext %d) { +entry: + %call = tail call noalias nonnull dereferenceable(4) i8* @_Znwm(i64 4) + %c = bitcast i8* %call to i32* + %cmp12 = icmp sgt i32 %d, 0 + %0 = load i32, i32* @a, align 4 + %tobool.not14 = icmp eq i32 %0, 0 + br i1 %tobool.not14, label %for.end4, label %for.cond1.preheader.preheader + +for.cond1.preheader.preheader: ; preds = %entry + %wide.trip.count = zext i32 %d to i64 + %1 = load i32*, i32** @b, align 8 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.backedge, %for.cond1.preheader.preheader + br i1 %cmp12, label %for.body2, label %for.cond1.preheader.backedge + +for.cond1.preheader.backedge: ; preds = %for.body2, %for.cond1.preheader + br label %for.cond1.preheader + +for.body2: ; preds = %for.cond1.preheader, %for.body2 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body2 ], [ 0, %for.cond1.preheader ] + %2 = load i32, i32* %1, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv + store i32 %2, i32* %arrayidx3, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond1.preheader.backedge, label %for.body2 + +for.end4: ; preds = %entry + ret void +} + +declare nonnull i8* @_Znwm(i64)