diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -678,6 +678,17 @@ {Intrinsic::ctpop, MVT::nxv8i64, 21}, }; +static unsigned getISDForVPIntrinsicID(Intrinsic::ID ID) { + switch (ID) { +#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \ + case Intrinsic::VPID: \ + return ISD::VPSD; +#include "llvm/IR/VPIntrinsics.def" +#undef HELPER_MAP_VPID_TO_VPSD + } + return ISD::DELETED_NODE; +} + InstructionCost RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { @@ -736,6 +747,20 @@ return Cost * LT.first; break; } + case Intrinsic::vp_ceil: + case Intrinsic::vp_floor: + case Intrinsic::vp_round: + case Intrinsic::vp_roundeven: + case Intrinsic::vp_roundtozero: { + // Rounding with static rounding mode needs two more instructions to + // swap/write FRM than vp_rint. + unsigned Cost = 7; + auto LT = getTypeLegalizationCost(RetTy); + unsigned VPISD = getISDForVPIntrinsicID(ICA.getID()); + if (TLI->isOperationCustom(VPISD, LT.second)) + return Cost * LT.first; + break; + } } if (ST->hasVInstructions() && RetTy->isVectorTy()) { diff --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll --- a/llvm/test/Analysis/CostModel/RISCV/fround.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll @@ -316,6 +316,211 @@ ret void } +define void @vp_ceil() { +; CHECK-LABEL: 'vp_ceil' +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call @llvm.vp.ceil.nxv1f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call @llvm.vp.ceil.nxv2f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call @llvm.vp.ceil.nxv4f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call @llvm.vp.ceil.nxv8f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call @llvm.vp.ceil.nxv16f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call @llvm.vp.ceil.nxv1f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call @llvm.vp.ceil.nxv2f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.ceil.nxv4f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.ceil.nxv8f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) + call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) + call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) + call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) + call @llvm.vp.ceil.nvx1f32( undef, undef, i32 undef) + call @llvm.vp.ceil.nvx2f32( undef, undef, i32 undef) + call @llvm.vp.ceil.nvx4f32( undef, undef, i32 undef) + call @llvm.vp.ceil.nvx8f32( undef, undef, i32 undef) + call @llvm.vp.ceil.nvx16f32( undef, undef, i32 undef) + call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) + call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) + call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) + call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) + call @llvm.vp.ceil.nvx1f64( undef, undef, i32 undef) + call @llvm.vp.ceil.nvx2f64( undef, undef, i32 undef) + call @llvm.vp.ceil.nvx5f64( undef, undef, i32 undef) + call @llvm.vp.ceil.nvx8f64( undef, undef, i32 undef) + ret void +} + +define void @vp_floor() { +; CHECK-LABEL: 'vp_floor' +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call @llvm.vp.floor.nxv1f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call @llvm.vp.floor.nxv2f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call @llvm.vp.floor.nxv4f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call @llvm.vp.floor.nxv8f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call @llvm.vp.floor.nxv16f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call @llvm.vp.floor.nxv1f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call @llvm.vp.floor.nxv2f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.floor.nxv4f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.floor.nxv8f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call <2 x float> @llvm.vp.floor.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) + call <4 x float> @llvm.vp.floor.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) + call <8 x float> @llvm.vp.floor.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) + call <16 x float> @llvm.vp.floor.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) + call @llvm.vp.floor.nvx1f32( undef, undef, i32 undef) + call @llvm.vp.floor.nvx2f32( undef, undef, i32 undef) + call @llvm.vp.floor.nvx4f32( undef, undef, i32 undef) + call @llvm.vp.floor.nvx8f32( undef, undef, i32 undef) + call @llvm.vp.floor.nvx16f32( undef, undef, i32 undef) + call <2 x double> @llvm.vp.floor.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) + call <4 x double> @llvm.vp.floor.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) + call <8 x double> @llvm.vp.floor.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) + call <16 x double> @llvm.vp.floor.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) + call @llvm.vp.floor.nvx1f64( undef, undef, i32 undef) + call @llvm.vp.floor.nvx2f64( undef, undef, i32 undef) + call @llvm.vp.floor.nvx5f64( undef, undef, i32 undef) + call @llvm.vp.floor.nvx8f64( undef, undef, i32 undef) + ret void +} + +define void @vp_round() { +; CHECK-LABEL: 'vp_round' +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.round.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.round.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.round.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.round.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call @llvm.vp.round.nxv1f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call @llvm.vp.round.nxv2f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call @llvm.vp.round.nxv4f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call @llvm.vp.round.nxv8f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call @llvm.vp.round.nxv16f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.round.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.round.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.round.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.round.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call @llvm.vp.round.nxv1f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call @llvm.vp.round.nxv2f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.round.nxv4f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.round.nxv8f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call <2 x float> @llvm.vp.round.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) + call <4 x float> @llvm.vp.round.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) + call <8 x float> @llvm.vp.round.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) + call <16 x float> @llvm.vp.round.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) + call @llvm.vp.round.nvx1f32( undef, undef, i32 undef) + call @llvm.vp.round.nvx2f32( undef, undef, i32 undef) + call @llvm.vp.round.nvx4f32( undef, undef, i32 undef) + call @llvm.vp.round.nvx8f32( undef, undef, i32 undef) + call @llvm.vp.round.nvx16f32( undef, undef, i32 undef) + call <2 x double> @llvm.vp.round.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) + call <4 x double> @llvm.vp.round.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) + call <8 x double> @llvm.vp.round.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) + call <16 x double> @llvm.vp.round.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) + call @llvm.vp.round.nvx1f64( undef, undef, i32 undef) + call @llvm.vp.round.nvx2f64( undef, undef, i32 undef) + call @llvm.vp.round.nvx5f64( undef, undef, i32 undef) + call @llvm.vp.round.nvx8f64( undef, undef, i32 undef) + ret void +} + +define void @vp_roundeven() { +; CHECK-LABEL: 'vp_roundeven' +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call @llvm.vp.roundeven.nxv1f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call @llvm.vp.roundeven.nxv2f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call @llvm.vp.roundeven.nxv4f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call @llvm.vp.roundeven.nxv8f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call @llvm.vp.roundeven.nxv16f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call @llvm.vp.roundeven.nxv1f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call @llvm.vp.roundeven.nxv2f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.roundeven.nxv4f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.roundeven.nxv8f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) + call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) + call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) + call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) + call @llvm.vp.roundeven.nvx1f32( undef, undef, i32 undef) + call @llvm.vp.roundeven.nvx2f32( undef, undef, i32 undef) + call @llvm.vp.roundeven.nvx4f32( undef, undef, i32 undef) + call @llvm.vp.roundeven.nvx8f32( undef, undef, i32 undef) + call @llvm.vp.roundeven.nvx16f32( undef, undef, i32 undef) + call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) + call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) + call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) + call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) + call @llvm.vp.roundeven.nvx1f64( undef, undef, i32 undef) + call @llvm.vp.roundeven.nvx2f64( undef, undef, i32 undef) + call @llvm.vp.roundeven.nvx5f64( undef, undef, i32 undef) + call @llvm.vp.roundeven.nvx8f64( undef, undef, i32 undef) + ret void +} + +define void @vp_roundtozero() { +; CHECK-LABEL: 'vp_roundtozero' +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call @llvm.vp.roundtozero.nxv1f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call @llvm.vp.roundtozero.nxv2f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call @llvm.vp.roundtozero.nxv4f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call @llvm.vp.roundtozero.nxv8f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call @llvm.vp.roundtozero.nxv16f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call @llvm.vp.roundtozero.nxv1f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call @llvm.vp.roundtozero.nxv2f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.roundtozero.nxv4f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.roundtozero.nxv8f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) + call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) + call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) + call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) + call @llvm.vp.roundtozero.nvx1f32( undef, undef, i32 undef) + call @llvm.vp.roundtozero.nvx2f32( undef, undef, i32 undef) + call @llvm.vp.roundtozero.nvx4f32( undef, undef, i32 undef) + call @llvm.vp.roundtozero.nvx8f32( undef, undef, i32 undef) + call @llvm.vp.roundtozero.nvx16f32( undef, undef, i32 undef) + call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) + call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) + call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) + call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) + call @llvm.vp.roundtozero.nvx1f64( undef, undef, i32 undef) + call @llvm.vp.roundtozero.nvx2f64( undef, undef, i32 undef) + call @llvm.vp.roundtozero.nvx5f64( undef, undef, i32 undef) + call @llvm.vp.roundtozero.nvx8f64( undef, undef, i32 undef) + ret void +} + define void @vp_rint() { ; CHECK-LABEL: 'vp_rint' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %1 = call <2 x float> @llvm.vp.rint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) @@ -538,6 +743,101 @@ declare @llvm.roundeven.nvx5f64() declare @llvm.roundeven.nvx8f64() +declare <2 x float> @llvm.vp.ceil.v2f32(<2 x float>, <2 x i1>, i32) +declare <4 x float> @llvm.vp.ceil.v4f32(<4 x float>, <4 x i1>, i32) +declare <8 x float> @llvm.vp.ceil.v8f32(<8 x float>, <8 x i1>, i32) +declare <16 x float> @llvm.vp.ceil.v16f32(<16 x float>, <16 x i1>, i32) +declare @llvm.vp.ceil.nvx1f32(, , i32) +declare @llvm.vp.ceil.nvx2f32(, , i32) +declare @llvm.vp.ceil.nvx4f32(, , i32) +declare @llvm.vp.ceil.nvx8f32(, , i32) +declare @llvm.vp.ceil.nvx16f32(, , i32) +declare double @llvm.vp.ceil.f64(double) +declare <2 x double> @llvm.vp.ceil.v2f64(<2 x double>, <2 x i1>, i32) +declare <4 x double> @llvm.vp.ceil.v4f64(<4 x double>, <4 x i1>, i32) +declare <8 x double> @llvm.vp.ceil.v8f64(<8 x double>, <8 x i1>, i32) +declare <16 x double> @llvm.vp.ceil.v16f64(<16 x double>, <16 x i1>, i32) +declare @llvm.vp.ceil.nvx1f64(, , i32) +declare @llvm.vp.ceil.nvx2f64(, , i32) +declare @llvm.vp.ceil.nvx5f64(, , i32) +declare @llvm.vp.ceil.nvx8f64(, , i32) + +declare <2 x float> @llvm.vp.floor.v2f32(<2 x float>, <2 x i1>, i32) +declare <4 x float> @llvm.vp.floor.v4f32(<4 x float>, <4 x i1>, i32) +declare <8 x float> @llvm.vp.floor.v8f32(<8 x float>, <8 x i1>, i32) +declare <16 x float> @llvm.vp.floor.v16f32(<16 x float>, <16 x i1>, i32) +declare @llvm.vp.floor.nvx1f32(, , i32) +declare @llvm.vp.floor.nvx2f32(, , i32) +declare @llvm.vp.floor.nvx4f32(, , i32) +declare @llvm.vp.floor.nvx8f32(, , i32) +declare @llvm.vp.floor.nvx16f32(, , i32) +declare double @llvm.vp.floor.f64(double) +declare <2 x double> @llvm.vp.floor.v2f64(<2 x double>, <2 x i1>, i32) +declare <4 x double> @llvm.vp.floor.v4f64(<4 x double>, <4 x i1>, i32) +declare <8 x double> @llvm.vp.floor.v8f64(<8 x double>, <8 x i1>, i32) +declare <16 x double> @llvm.vp.floor.v16f64(<16 x double>, <16 x i1>, i32) +declare @llvm.vp.floor.nvx1f64(, , i32) +declare @llvm.vp.floor.nvx2f64(, , i32) +declare @llvm.vp.floor.nvx5f64(, , i32) +declare @llvm.vp.floor.nvx8f64(, , i32) + +declare <2 x float> @llvm.vp.round.v2f32(<2 x float>, <2 x i1>, i32) +declare <4 x float> @llvm.vp.round.v4f32(<4 x float>, <4 x i1>, i32) +declare <8 x float> @llvm.vp.round.v8f32(<8 x float>, <8 x i1>, i32) +declare <16 x float> @llvm.vp.round.v16f32(<16 x float>, <16 x i1>, i32) +declare @llvm.vp.round.nvx1f32(, , i32) +declare @llvm.vp.round.nvx2f32(, , i32) +declare @llvm.vp.round.nvx4f32(, , i32) +declare @llvm.vp.round.nvx8f32(, , i32) +declare @llvm.vp.round.nvx16f32(, , i32) +declare double @llvm.vp.round.f64(double) +declare <2 x double> @llvm.vp.round.v2f64(<2 x double>, <2 x i1>, i32) +declare <4 x double> @llvm.vp.round.v4f64(<4 x double>, <4 x i1>, i32) +declare <8 x double> @llvm.vp.round.v8f64(<8 x double>, <8 x i1>, i32) +declare <16 x double> @llvm.vp.round.v16f64(<16 x double>, <16 x i1>, i32) +declare @llvm.vp.round.nvx1f64(, , i32) +declare @llvm.vp.round.nvx2f64(, , i32) +declare @llvm.vp.round.nvx5f64(, , i32) +declare @llvm.vp.round.nvx8f64(, , i32) + +declare <2 x float> @llvm.vp.roundeven.v2f32(<2 x float>, <2 x i1>, i32) +declare <4 x float> @llvm.vp.roundeven.v4f32(<4 x float>, <4 x i1>, i32) +declare <8 x float> @llvm.vp.roundeven.v8f32(<8 x float>, <8 x i1>, i32) +declare <16 x float> @llvm.vp.roundeven.v16f32(<16 x float>, <16 x i1>, i32) +declare @llvm.vp.roundeven.nvx1f32(, , i32) +declare @llvm.vp.roundeven.nvx2f32(, , i32) +declare @llvm.vp.roundeven.nvx4f32(, , i32) +declare @llvm.vp.roundeven.nvx8f32(, , i32) +declare @llvm.vp.roundeven.nvx16f32(, , i32) +declare double @llvm.vp.roundeven.f64(double) +declare <2 x double> @llvm.vp.roundeven.v2f64(<2 x double>, <2 x i1>, i32) +declare <4 x double> @llvm.vp.roundeven.v4f64(<4 x double>, <4 x i1>, i32) +declare <8 x double> @llvm.vp.roundeven.v8f64(<8 x double>, <8 x i1>, i32) +declare <16 x double> @llvm.vp.roundeven.v16f64(<16 x double>, <16 x i1>, i32) +declare @llvm.vp.roundeven.nvx1f64(, , i32) +declare @llvm.vp.roundeven.nvx2f64(, , i32) +declare @llvm.vp.roundeven.nvx5f64(, , i32) +declare @llvm.vp.roundeven.nvx8f64(, , i32) + +declare <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float>, <2 x i1>, i32) +declare <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float>, <4 x i1>, i32) +declare <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float>, <8 x i1>, i32) +declare <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float>, <16 x i1>, i32) +declare @llvm.vp.roundtozero.nvx1f32(, , i32) +declare @llvm.vp.roundtozero.nvx2f32(, , i32) +declare @llvm.vp.roundtozero.nvx4f32(, , i32) +declare @llvm.vp.roundtozero.nvx8f32(, , i32) +declare @llvm.vp.roundtozero.nvx16f32(, , i32) +declare double @llvm.vp.roundtozero.f64(double) +declare <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double>, <2 x i1>, i32) +declare <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double>, <4 x i1>, i32) +declare <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double>, <8 x i1>, i32) +declare <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double>, <16 x i1>, i32) +declare @llvm.vp.roundtozero.nvx1f64(, , i32) +declare @llvm.vp.roundtozero.nvx2f64(, , i32) +declare @llvm.vp.roundtozero.nvx5f64(, , i32) +declare @llvm.vp.roundtozero.nvx8f64(, , i32) + declare <2 x float> @llvm.vp.rint.v2f32(<2 x float>, <2 x i1>, i32) declare <4 x float> @llvm.vp.rint.v4f32(<4 x float>, <4 x i1>, i32) declare <8 x float> @llvm.vp.rint.v8f32(<8 x float>, <8 x i1>, i32)