diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -62,8 +62,9 @@ /// Return the cost of materializing an immediate for a value operand of /// a store instruction. - InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, - TTI::TargetCostKind CostKind); + InstructionCost getConstantMaterializationCost(Type *VecTy, + TTI::OperandValueInfo OpInfo, + TTI::TargetCostKind CostKind); InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); @@ -166,6 +167,10 @@ TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + InstructionCost getPHICost(Type *Ty, TTI::TargetCostKind CostKind, + ArrayRef OpInfos, + const Instruction *I = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1287,9 +1287,8 @@ getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind); } -InstructionCost RISCVTTIImpl::getStoreImmCost(Type *Ty, - TTI::OperandValueInfo OpInfo, - TTI::TargetCostKind CostKind) { +InstructionCost RISCVTTIImpl::getConstantMaterializationCost( + Type *Ty, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind) { assert(OpInfo.isConstant() && "non constant operand?"); if (!isa(Ty)) // FIXME: We need to account for immediate materialization here, but doing @@ -1306,7 +1305,6 @@ return getConstantPoolLoadCost(Ty, CostKind); } - InstructionCost RISCVTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, @@ -1321,7 +1319,7 @@ InstructionCost Cost = 0; if (Opcode == Instruction::Store && OpInfo.isConstant()) - Cost += getStoreImmCost(Src, OpInfo, CostKind); + Cost += getConstantMaterializationCost(Src, OpInfo, CostKind); InstructionCost BaseCost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind, OpInfo, I); @@ -1418,6 +1416,24 @@ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } +InstructionCost +RISCVTTIImpl::getPHICost(Type *Ty, TTI::TargetCostKind CostKind, + ArrayRef OpInfos, + const Instruction *I) { + // TODO: Have getConstantMaterializationCost compute cost for scalars + if (CostKind == TTI::TCK_RecipThroughput && Ty->isVectorTy()) { + InstructionCost Cost = 0; + // The throughput cost is going to be the cost of one of the possible + // incoming values, so choose the most expensive + for (TTI::OperandValueInfo OpInfo : OpInfos) + if (OpInfo.isConstant()) + Cost = std::max(Cost, + getConstantMaterializationCost(Ty, OpInfo, CostKind)); + return Cost; + } + return BaseT::getPHICost(Ty, CostKind, OpInfos, I); +} + InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-phi-const.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-phi-const.ll --- a/llvm/test/Analysis/CostModel/RISCV/rvv-phi-const.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-phi-const.ll @@ -6,7 +6,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x = phi <2 x i8> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = phi <2 x i8> [ , %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i8> %x ; br i1 %c, label %a, label %b @@ -42,7 +42,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x = phi <4 x i32> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = phi <4 x i32> [ , %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %x ; br i1 %c, label %a, label %b @@ -60,7 +60,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x = phi <4 x i32> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = phi <4 x i32> [ , %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %x ; br i1 %c, label %a, label %b @@ -79,7 +79,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x = phi <4 x i32> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = phi <4 x i32> [ , %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %x ; br i1 %c, label %a, label %b diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -159,26 +159,33 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 8128 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 8064 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <8 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x ptr> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <8 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x ptr> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x ptr> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[STRIDED_VEC]], -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP3]], <8 x ptr> [[TMP0]], i32 4, <8 x i1> ) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 64 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1016 -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC2]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC]], +; CHECK-NEXT: [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC3]], +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP6]], <8 x ptr> [[TMP0]], i32 4, <8 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP7]], <8 x ptr> [[TMP1]], i32 4, <8 x i1> ) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 128 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1008 +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1016, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1008, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: