diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -365,6 +365,9 @@ /// Return true if the (vector) instruction I will be lowered to an instruction /// with a scalar splat operand for the given Operand number. bool canSplatOperand(Instruction *I, int Operand) const; + /// Return true if a vector instruction will lower to a target instruction + /// able to splat the given operand. + bool canSplatOperand(unsigned Opcode, int Operand) const; bool shouldSinkOperands(Instruction *I, SmallVectorImpl &Ops) const override; bool shouldScalarizeBinop(SDValue VecOp) const override; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1283,11 +1283,8 @@ return !XC; } -bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const { - if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) - return false; - - switch (I->getOpcode()) { +bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const { + switch (Opcode) { case Instruction::Add: case Instruction::Sub: case Instruction::Mul: @@ -1309,38 +1306,48 @@ case Instruction::URem: case Instruction::SRem: return Operand == 1; - case Instruction::Call: - if (auto *II = dyn_cast(I)) { - switch (II->getIntrinsicID()) { - case Intrinsic::fma: - case Intrinsic::vp_fma: - return Operand == 0 || Operand == 1; - case Intrinsic::vp_shl: - case Intrinsic::vp_lshr: - case Intrinsic::vp_ashr: - case Intrinsic::vp_udiv: - case Intrinsic::vp_sdiv: - case Intrinsic::vp_urem: - case Intrinsic::vp_srem: - return Operand == 1; - // These intrinsics are commutative. - case Intrinsic::vp_add: - case Intrinsic::vp_mul: - case Intrinsic::vp_and: - case Intrinsic::vp_or: - case Intrinsic::vp_xor: - case Intrinsic::vp_fadd: - case Intrinsic::vp_fmul: - // These intrinsics have 'vr' versions. - case Intrinsic::vp_sub: - case Intrinsic::vp_fsub: - case Intrinsic::vp_fdiv: - return Operand == 0 || Operand == 1; - default: - return false; - } - } + default: return false; + } +} + + +bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const { + if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) + return false; + + if (canSplatOperand(I->getOpcode(), Operand)) + return true; + + auto *II = dyn_cast(I); + if (!II) + return false; + + switch (II->getIntrinsicID()) { + case Intrinsic::fma: + case Intrinsic::vp_fma: + return Operand == 0 || Operand == 1; + case Intrinsic::vp_shl: + case Intrinsic::vp_lshr: + case Intrinsic::vp_ashr: + case Intrinsic::vp_udiv: + case Intrinsic::vp_sdiv: + case Intrinsic::vp_urem: + case Intrinsic::vp_srem: + return Operand == 1; + // These intrinsics are commutative. + case Intrinsic::vp_add: + case Intrinsic::vp_mul: + case Intrinsic::vp_and: + case Intrinsic::vp_or: + case Intrinsic::vp_xor: + case Intrinsic::vp_fadd: + case Intrinsic::vp_fmul: + // These intrinsics have 'vr' versions. + case Intrinsic::vp_sub: + case Intrinsic::vp_fsub: + case Intrinsic::vp_fdiv: + return Operand == 0 || Operand == 1; default: return false; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1079,6 +1079,31 @@ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, Args, CxtI); + + auto getConstantMatCost = + [&](unsigned Operand, TTI::OperandValueInfo OpInfo) -> InstructionCost { + if (OpInfo.isUniform() && TLI->canSplatOperand(Opcode, Operand)) + // Two sub-cases: + // * Has a 5 bit immediate operand which can be splatted. + // * Has a larger immediate which must be materialized in scalar register + // We return 0 for both as we currently ignore the cost of materializing + // scalar constants in GPRs. + return 0; + + // Add a cost of address generation + the cost of the vector load. The + // address is expected to be a PC relative offset to a constant pool entry + // using auipc/addi. + return 2 + getMemoryOpCost(Instruction::Load, Ty, DL.getABITypeAlign(Ty), + /*AddressSpace=*/0, CostKind); + }; + + // Add the cost of materializing any constant vectors required. + InstructionCost ConstantMatCost = 0; + if (Op1Info.isConstant()) + ConstantMatCost += getConstantMatCost(0, Op1Info); + if (Op2Info.isConstant()) + ConstantMatCost += getConstantMatCost(1, Op2Info); + switch (TLI->InstructionOpcodeToISD(Opcode)) { case ISD::ADD: case ISD::SUB: @@ -1095,13 +1120,12 @@ case ISD::FSUB: case ISD::FMUL: case ISD::FNEG: { - // TODO: Add the cost of materializing any constant vectors required since - // we otherwise treat constants as no-cost. // TODO: We should be accounting for LMUL and scaling costs for LMUL > 1. - return LT.first * 1; + return ConstantMatCost + LT.first * 1; } default: - return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, + return ConstantMatCost + + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, Args, CxtI); } } diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-int.ll b/llvm/test/Analysis/CostModel/RISCV/arith-int.ll --- a/llvm/test/Analysis/CostModel/RISCV/arith-int.ll +++ b/llvm/test/Analysis/CostModel/RISCV/arith-int.ll @@ -815,14 +815,14 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> , undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> , undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = add <4 x i32> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = add <4 x i32> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = add <4 x i32> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = add <4 x i32> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = add <4 x i32> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = add <4 x i32> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = add <4 x i32> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = add <4 x i32> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %8 = add <4 x i32> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = add <4 x i32> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = add <4 x i32> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = add <4 x i32> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = add <4 x i32> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %13 = add <4 x i32> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = add <4 x i32> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %15 = add <4 x i32> , undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ;