diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1202,14 +1202,11 @@ if (ICA.isTypeBasedOnly()) return getTypeBasedIntrinsicInstrCost(ICA, CostKind); - // TODO: Handle scalable vectors? Type *RetTy = ICA.getReturnType(); - if (isa(RetTy)) - return BaseT::getIntrinsicInstrCost(ICA, CostKind); ElementCount VF = ICA.getVectorFactor(); ElementCount RetVF = - (RetTy->isVectorTy() ? cast(RetTy)->getElementCount() + (RetTy->isVectorTy() ? cast(RetTy)->getElementCount() : ElementCount::getFixed(1)); assert((RetVF.isScalar() || VF.isScalar()) && "VF > 1 and RetVF is a vector type"); @@ -1238,21 +1235,25 @@ return thisT()->getMemcpyCost(ICA.getInst()); case Intrinsic::masked_scatter: { - assert(VF.isScalar() && "Can't vectorize types here."); - const Value *Mask = Args[3]; - bool VarMask = !isa(Mask); - Align Alignment = cast(Args[2])->getAlignValue(); - return thisT()->getGatherScatterOpCost(Instruction::Store, - Args[0]->getType(), Args[1], - VarMask, Alignment, CostKind, I); + if (!isa(RetTy)) { + assert(VF.isScalar() && "Can't vectorize types here."); + const Value *Mask = Args[3]; + bool VarMask = !isa(Mask); + Align Alignment = cast(Args[2])->getAlignValue(); + return thisT()->getGatherScatterOpCost(Instruction::Store, + Args[0]->getType(), Args[1], + VarMask, Alignment, CostKind, I); + } } case Intrinsic::masked_gather: { - assert(VF.isScalar() && "Can't vectorize types here."); - const Value *Mask = Args[2]; - bool VarMask = !isa(Mask); - Align Alignment = cast(Args[1])->getAlignValue(); - return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0], - VarMask, Alignment, CostKind, I); + if (!isa(RetTy)) { + assert(VF.isScalar() && "Can't vectorize types here."); + const Value *Mask = Args[2]; + bool VarMask = !isa(Mask); + Align Alignment = cast(Args[1])->getAlignValue(); + return thisT()->getGatherScatterOpCost( + Instruction::Load, RetTy, Args[0], VarMask, Alignment, CostKind, I); + } } case Intrinsic::vector_reduce_add: case Intrinsic::vector_reduce_mul: @@ -1265,57 +1266,68 @@ case Intrinsic::vector_reduce_fmin: case Intrinsic::vector_reduce_umax: case Intrinsic::vector_reduce_umin: { - IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I); - return getTypeBasedIntrinsicInstrCost(Attrs, CostKind); + if (!isa(RetTy)) { + IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, + I); + return getTypeBasedIntrinsicInstrCost(Attrs, CostKind); + } } case Intrinsic::vector_reduce_fadd: case Intrinsic::vector_reduce_fmul: { - IntrinsicCostAttributes Attrs( - IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, 1, I); - return getTypeBasedIntrinsicInstrCost(Attrs, CostKind); + if (!isa(RetTy)) { + IntrinsicCostAttributes Attrs( + IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, 1, I); + return getTypeBasedIntrinsicInstrCost(Attrs, CostKind); + } } case Intrinsic::fshl: case Intrinsic::fshr: { - const Value *X = Args[0]; - const Value *Y = Args[1]; - const Value *Z = Args[2]; - TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW; - TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX); - TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY); - TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ); - TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue; - OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 - : TTI::OP_None; - // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) - // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) - unsigned Cost = 0; - Cost += - thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind); - Cost += - thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind); - Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Shl, RetTy, CostKind, OpKindX, OpKindZ, OpPropsX); - Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::LShr, RetTy, CostKind, OpKindY, OpKindZ, OpPropsY); - // Non-constant shift amounts requires a modulo. - if (OpKindZ != TTI::OK_UniformConstantValue && - OpKindZ != TTI::OK_NonUniformConstantValue) - Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy, - CostKind, OpKindZ, OpKindBW, - OpPropsZ, OpPropsBW); - // For non-rotates (X != Y) we must add shift-by-zero handling costs. - if (X != Y) { - Type *CondTy = RetTy->getWithNewBitWidth(1); - Cost += - thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); - Cost += - thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); + if (!isa(RetTy)) { + const Value *X = Args[0]; + const Value *Y = Args[1]; + const Value *Z = Args[2]; + TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW; + TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX); + TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY); + TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ); + TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue; + OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) + ? TTI::OP_PowerOf2 + : TTI::OP_None; + // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) + // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) + unsigned Cost = 0; + Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, + CostKind); + Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, + CostKind); + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Shl, RetTy, CostKind, OpKindX, OpKindZ, OpPropsX); + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::LShr, RetTy, CostKind, OpKindY, OpKindZ, OpPropsY); + // Non-constant shift amounts requires a modulo. + if (OpKindZ != TTI::OK_UniformConstantValue && + OpKindZ != TTI::OK_NonUniformConstantValue) + Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy, + CostKind, OpKindZ, OpKindBW, + OpPropsZ, OpPropsBW); + // For non-rotates (X != Y) we must add shift-by-zero handling costs. + if (X != Y) { + Type *CondTy = RetTy->getWithNewBitWidth(1); + Cost += thisT()->getCmpSelInstrCost( + BinaryOperator::ICmp, RetTy, CondTy, CmpInst::BAD_ICMP_PREDICATE, + CostKind); + Cost += thisT()->getCmpSelInstrCost( + BinaryOperator::Select, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + } + return Cost; } - return Cost; } } + // TODO: Handle the remaining intrinsic with scalable vector type + if (isa(RetTy)) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); // Assume that we need to scalarize this intrinsic. SmallVector Types; diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll @@ -0,0 +1,31 @@ +; Checks getIntrinsicInstrCost in BasicTTIImpl.h with SVE for CTLZ and CCTZ + +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s + +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +; Check for CTLZ + +define void @ctlz_nxv4i32( %A) { +; CHECK-LABEL: 'ctlz_nxv4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-NETX: Cost Model: Found an estimated cost of 0 for instruction: + %1 = tail call @llvm.ctlz.nxv4i32( %A, i1 true) + ret void +} + +; Check for CCTZ + +define void @cttz_nxv4i32( %A) { +; CHECK-LABEL: 'cttz_nxv4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-NETX: Cost Model: Found an estimated cost of 0 for instruction: + %1 = tail call @llvm.cttz.nxv4i32( %A, i1 true) + ret void +} + +declare @llvm.ctlz.nxv4i32(, i1) +declare @llvm.cttz.nxv4i32(, i1)