diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1288,15 +1288,11 @@ case Intrinsic::vector_reduce_fmin: case Intrinsic::vector_reduce_umax: case Intrinsic::vector_reduce_umin: { - if (isa<ScalableVectorType>(RetTy)) - return BaseT::getIntrinsicInstrCost(ICA, CostKind); IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I); return getTypeBasedIntrinsicInstrCost(Attrs, CostKind); } case Intrinsic::vector_reduce_fadd: case Intrinsic::vector_reduce_fmul: { - if (isa<ScalableVectorType>(RetTy)) - return BaseT::getIntrinsicInstrCost(ICA, CostKind); IntrinsicCostAttributes Attrs( IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, 1, I); return getTypeBasedIntrinsicInstrCost(Attrs, CostKind); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -139,6 +139,14 @@ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); + int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, + bool IsPairwise, bool IsUnsigned, + TTI::TargetCostKind CostKind); + + int getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, + bool IsPairwiseForm, + TTI::TargetCostKind CostKind); + int getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1101,11 +1101,70 @@ return false; } +int AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, + bool IsPairwise, bool IsUnsigned, + TTI::TargetCostKind CostKind) { + if (!isa<ScalableVectorType>(Ty)) + return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned, + CostKind); + assert((isa<ScalableVectorType>(Ty) && isa<ScalableVectorType>(CondTy)) && + "Both vector needs to be scalable"); + + std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); + int LegalizationCost = 0; + if (LT.first > 1) { + Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext()); + unsigned CmpOpcode = + Ty->isFPOrFPVectorTy() ? Instruction::FCmp : Instruction::ICmp; + LegalizationCost = + getCmpSelInstrCost(CmpOpcode, LegalVTy, LegalVTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind) + + getCmpSelInstrCost(Instruction::Select, LegalVTy, LegalVTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + LegalizationCost *= LT.first - 1; + } + + return LegalizationCost + /*Cost of horizontal reduction*/ 2; +} + +int AArch64TTIImpl::getArithmeticReductionCostSVE( + unsigned Opcode, VectorType *ValTy, bool IsPairwise, + TTI::TargetCostKind CostKind) { + assert(!IsPairwise && "Cannot be pair wise to continue"); + + std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); + int LegalizationCost = 0; + if (LT.first > 1) { + Type *LegalVTy = EVT(LT.second).getTypeForEVT(ValTy->getContext()); + LegalizationCost = getArithmeticInstrCost(Opcode, LegalVTy, CostKind); + LegalizationCost *= LT.first - 1; + } + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + // Add the final reduction cost for the legal horizontal reduction + switch (ISD) { + case ISD::ADD: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::FADD: + return LegalizationCost + 2; + default: + // TODO: Replace for invalid when InstructionCost is used + // cases not supported by SVE + return 16; + } +} + int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, bool IsPairwiseForm, TTI::TargetCostKind CostKind) { + if (isa<ScalableVectorType>(ValTy)) + return getArithmeticReductionCostSVE(Opcode, ValTy, IsPairwiseForm, + CostKind); if (IsPairwiseForm) return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm, CostKind); diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reduce.ll b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reduce.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reduce.ll @@ -0,0 +1,306 @@ +; Check getIntrinsicInstrCost in BasicTTIImpl.h with SVE for vector.reduce.<operand> +; Checks legal and not legal vector size + +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s + + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +define i32 @add.i32.nxv4i32(<vscale x 4 x i32> %v) { +; CHECK-LABEL: 'add.i32.nxv4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v) +; CHECK-NEXT:Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r + + %r = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v) + ret i32 %r +} + +define i32 @mul.i32.nxv4i32(<vscale x 4 x i32> %v) { +; CHECK-LABEL: 'mul.i32.nxv4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r + + %r = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v) + ret i32 %r +} + +define i32 @and.i32.nxv4i32(<vscale x 4 x i32> %v) { +; CHECK-LABEL: 'and.i32.nxv4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r + + %r = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v) + ret i32 %r +} + +define i32 @or.i32.nxv4i32(<vscale x 4 x i32> %v) { +; CHECK-LABEL: 'or.i32.nxv4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r + + %r = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v) + ret i32 %r +} + +define i32 @xor.i32.nxv4i32(<vscale x 4 x i32> %v) { +; CHECK-LABEL: 'xor.i32.nxv4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r + + %r = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v) + ret i32 %r +} + +define i32 @umin.i32.nxv4i32(<vscale x 4 x i32> %v) { +; CHECK-LABEL: 'umin.i32.nxv4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r + + %r = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v) + ret i32 %r +} + +define float @fmax.f32.nxv4f32(<vscale x 4 x float> %v) { +; CHECK-LABEL: 'fmax.f32.nxv4f32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r + + %r = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v) + ret float %r +} + +define i32 @fmax.i32.nxv4i32(<vscale x 4 x i32> %v) { +; CHECK-LABEL: 'fmax.i32.nxv4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.fmax.nxv4i32(<vscale x 4 x i32> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r + + %r = call i32 @llvm.vector.reduce.fmax.nxv4i32(<vscale x 4 x i32> %v) + ret i32 %r +} + +define float @fmin.f32.nxv4f32(<vscale x 4 x float> %v) { +; CHECK-LABEL: 'fmin.f32.nxv4f32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r + + %r = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v) + ret float %r +} + +define i32 @fmin.i32.nxv4i32(<vscale x 4 x i32> %v) { +; CHECK-LABEL: 'fmin.i32.nxv4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.fmin.nxv4i32(<vscale x 4 x i32> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r + + %r = call i32 @llvm.vector.reduce.fmin.nxv4i32(<vscale x 4 x i32> %v) + ret i32 %r +} + +define i32 @umax.i32.nxv4i32(<vscale x 4 x i32> %v) { +; CHECK-LABEL: 'umax.i32.nxv4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r + + %r = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v) + ret i32 %r +} +define i32 @smin.i32.nxv4i32(<vscale x 4 x i32> %v) { +; CHECK-LABEL: 'smin.i32.nxv4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r + + %r = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v) + ret i32 %r +} +define i32 @smax.i32.nxv4i32(<vscale x 4 x i32> %v) { +; CHECK-LABEL: 'smax.i32.nxv4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r + + %r = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v) + ret i32 %r +} + +define float @fadda_nxv4f32(float %start, <vscale x 4 x float> %a) #0 { +; CHECK-LABEL: 'fadda_nxv4f32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call float @llvm.vector.reduce.fadd.nxv4f32(float %start, <vscale x 4 x float> %a) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %res + + %res = call float @llvm.vector.reduce.fadd.nxv4f32(float %start, <vscale x 4 x float> %a) + ret float %res +} + +define i32 @fadda_nxv4i32(i32 %start, <vscale x 4 x i32> %a) #0 { +; CHECK-LABEL: 'fadda_nxv4i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call i32 @llvm.vector.reduce.fadd.nxv4i32(i32 %start, <vscale x 4 x i32> %a) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res + + %res = call i32 @llvm.vector.reduce.fadd.nxv4i32(i32 %start, <vscale x 4 x i32> %a) + ret i32 %res +} +;Test legalization cost + +define i64 @add.i64.nxv8i64(<vscale x 8 x i64> %v) { +; CHECK-LABEL: 'add.i64.nxv8i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> %v) +; CHECK-NEXT:Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r + + %r = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> %v) + ret i64 %r +} + +define i64 @mul.i64.nxv8i64(<vscale x 8 x i64> %v) { +; CHECK-LABEL: 'mul.i64.nxv8i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r = call i64 @llvm.vector.reduce.mul.nxv8i64(<vscale x 8 x i64> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r + + %r = call i64 @llvm.vector.reduce.mul.nxv8i64(<vscale x 8 x i64> %v) + ret i64 %r +} + +define i64 @and.i64.nxv8i64(<vscale x 8 x i64> %v) { +; CHECK-LABEL: 'and.i64.nxv8i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = call i64 @llvm.vector.reduce.and.nxv8i64(<vscale x 8 x i64> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r + + %r = call i64 @llvm.vector.reduce.and.nxv8i64(<vscale x 8 x i64> %v) + ret i64 %r +} + +define i64 @or.i64.nxv8i64(<vscale x 8 x i64> %v) { +; CHECK-LABEL: 'or.i64.nxv8i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = call i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r + + %r = call i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64> %v) + ret i64 %r +} + +define i64 @xor.i64.nxv8i64(<vscale x 8 x i64> %v) { +; CHECK-LABEL: 'xor.i64.nxv8i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = call i64 @llvm.vector.reduce.xor.nxv8i64(<vscale x 8 x i64> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r + + %r = call i64 @llvm.vector.reduce.xor.nxv8i64(<vscale x 8 x i64> %v) + ret i64 %r +} + +define i64 @umin.i64.nxv8i64(<vscale x 8 x i64> %v) { +; CHECK-LABEL: 'umin.i64.nxv8i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = call i64 @llvm.vector.reduce.umin.nxv8i64(<vscale x 8 x i64> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r + + %r = call i64 @llvm.vector.reduce.umin.nxv8i64(<vscale x 8 x i64> %v) + ret i64 %r +} + +define float @fmax.f32.nxv8f32(<vscale x 8 x float> %v) { +; CHECK-LABEL: 'fmax.f32.nxv8f32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = call float @llvm.vector.reduce.fmax.nxv8f32(<vscale x 8 x float> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r + + %r = call float @llvm.vector.reduce.fmax.nxv8f32(<vscale x 8 x float> %v) + ret float %r +} + +define i32 @fmax.i32.nxv8i32(<vscale x 8 x i32> %v) { +; CHECK-LABEL: 'fmax.i32.nxv8i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = call i32 @llvm.vector.reduce.fmax.nxv8i32(<vscale x 8 x i32> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r + + %r = call i32 @llvm.vector.reduce.fmax.nxv8i32(<vscale x 8 x i32> %v) + ret i32 %r +} + +define float @fmin.f32.nxv8f32(<vscale x 8 x float> %v) { +; CHECK-LABEL: 'fmin.f32.nxv8f32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = call float @llvm.vector.reduce.fmin.nxv8f32(<vscale x 8 x float> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r + + %r = call float @llvm.vector.reduce.fmin.nxv8f32(<vscale x 8 x float> %v) + ret float %r +} + +define i32 @fmin.i32.nxv8i32(<vscale x 8 x i32> %v) { +; CHECK-LABEL: 'fmin.i32.nxv8i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = call i32 @llvm.vector.reduce.fmin.nxv8i32(<vscale x 8 x i32> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r + + %r = call i32 @llvm.vector.reduce.fmin.nxv8i32(<vscale x 8 x i32> %v) + ret i32 %r +} + +define i64 @umax.i64.nxv8i64(<vscale x 8 x i64> %v) { +; CHECK-LABEL: 'umax.i64.nxv8i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = call i64 @llvm.vector.reduce.umax.nxv8i64(<vscale x 8 x i64> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r + + %r = call i64 @llvm.vector.reduce.umax.nxv8i64(<vscale x 8 x i64> %v) + ret i64 %r +} +define i64 @smin.i64.nxv8i64(<vscale x 8 x i64> %v) { +; CHECK-LABEL: 'smin.i64.nxv8i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = call i64 @llvm.vector.reduce.smin.nxv8i64(<vscale x 8 x i64> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r + + %r = call i64 @llvm.vector.reduce.smin.nxv8i64(<vscale x 8 x i64> %v) + ret i64 %r +} +define i64 @smax.i64.nxv8i64(<vscale x 8 x i64> %v) { +; CHECK-LABEL: 'smax.i64.nxv8i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = call i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64> %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r + + %r = call i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64> %v) + ret i64 %r +} + +define float @fadda_nxv8f32(float %start, <vscale x 8 x float> %a) #0 { +; CHECK-LABEL: 'fadda_nxv8f32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call float @llvm.vector.reduce.fadd.nxv8f32(float %start, <vscale x 8 x float> %a) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %res + + %res = call float @llvm.vector.reduce.fadd.nxv8f32(float %start, <vscale x 8 x float> %a) + ret float %res +} + +define i32 @fadda_nxv8i32(i32 %start, <vscale x 8 x i32> %a) #0 { +; CHECK-LABEL: 'fadda_nxv8i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call i32 @llvm.vector.reduce.fadd.nxv8i32(i32 %start, <vscale x 8 x i32> %a) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res + + %res = call i32 @llvm.vector.reduce.fadd.nxv8i32(i32 %start, <vscale x 8 x i32> %a) + ret i32 %res +} + +declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>) +declare i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32>) +declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>) +declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>) +declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>) +declare float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float>) +declare i32 @llvm.vector.reduce.fmax.nxv4i32(<vscale x 4 x i32>) +declare float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float>) +declare i32 @llvm.vector.reduce.fmin.nxv4i32(<vscale x 4 x i32>) +declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>) +declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>) +declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>) +declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>) +declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>) +declare i32 @llvm.vector.reduce.fadd.nxv4i32(i32, <vscale x 4 x i32>) +declare i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64>) +declare i64 @llvm.vector.reduce.mul.nxv8i64(<vscale x 8 x i64>) +declare i64 @llvm.vector.reduce.and.nxv8i64(<vscale x 8 x i64>) +declare i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64>) +declare i64 @llvm.vector.reduce.xor.nxv8i64(<vscale x 8 x i64>) +declare float @llvm.vector.reduce.fmax.nxv8f32(<vscale x 8 x float>) +declare float @llvm.vector.reduce.fmin.nxv8f32(<vscale x 8 x float>) +declare i32 @llvm.vector.reduce.fmax.nxv8i32(<vscale x 8 x i32>) +declare i32 @llvm.vector.reduce.fmin.nxv8i32(<vscale x 8 x i32>) +declare i64 @llvm.vector.reduce.umin.nxv8i64(<vscale x 8 x i64>) +declare i64 @llvm.vector.reduce.umax.nxv8i64(<vscale x 8 x i64>) +declare i64 @llvm.vector.reduce.smin.nxv8i64(<vscale x 8 x i64>) +declare i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64>) +declare float @llvm.vector.reduce.fadd.nxv8f32(float, <vscale x 8 x float>) +declare i32 @llvm.vector.reduce.fadd.nxv8i32(i32, <vscale x 8 x i32>)