diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -88,6 +88,14 @@ TTI::TargetCostKind CostKind, const Instruction *I); + InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, + bool IsUnsigned, + TTI::TargetCostKind CostKind); + + InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, + Optional FMF, + TTI::TargetCostKind CostKind); + bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) { if (!ST->hasVInstructions()) return false; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -217,6 +217,64 @@ return NumLoads * MemOpCost; } +InstructionCost +RISCVTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, + bool IsUnsigned, + TTI::TargetCostKind CostKind) { + // FIXME: Only supporting fixed vectors for now. + if (!isa(Ty)) + return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind); + + if (!ST->useRVVForFixedLengthVectors()) + return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind); + + // Skip if scalar size of Ty is bigger than ELEN. + if (Ty->getScalarSizeInBits() > ST->getMaxELENForFixedLengthVectors()) + return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind); + + // IR Reduction is composed by two vmv and one rvv reduction instruction. + InstructionCost BaseCost = 2; + unsigned VL = cast(Ty)->getNumElements(); + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL); +} + +InstructionCost +RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *VTy, + Optional FMF, + TTI::TargetCostKind CostKind) { + // FIXME: Only supporting fixed vectors for now. + if (!isa(VTy)) + return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind); + + // FIXME: Do not support i1 and/or reduction now. + if (VTy->getElementType()->isIntegerTy(1)) + return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind); + + if (!ST->useRVVForFixedLengthVectors()) + return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind); + + // Skip if scalar size of VTy is bigger than ELEN. + if (VTy->getScalarSizeInBits() > ST->getMaxELENForFixedLengthVectors()) + return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind); + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + if (ISD != ISD::ADD && ISD != ISD::OR && ISD != ISD::XOR && ISD != ISD::AND && + ISD != ISD::FADD) + return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind); + + // IR Reduction is composed by two vmv and one rvv reduction instruction. + InstructionCost BaseCost = 2; + unsigned VL = cast(VTy)->getNumElements(); + std::pair LT = TLI->getTypeLegalizationCost(DL, VTy); + + if (TTI::requiresOrderedReduction(FMF)) + return (LT.first - 1) + BaseCost + VL; + return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL); +} + void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) { diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll @@ -0,0 +1,172 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV32 +; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV64 + +define i32 @reduce_i8(i32 %arg) { +; RISCV32-LABEL: 'reduce_i8' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_i8' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) + ret i32 undef +} + +define i32 @reduce_i16(i32 %arg) { +; RISCV32-LABEL: 'reduce_i16' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_i16' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) + %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef) + ret i32 undef +} + +define i32 @reduce_i32(i32 %arg) { +; RISCV32-LABEL: 'reduce_i32' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_i32' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) + %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef) + %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef) + ret i32 undef +} + +define i32 @reduce_i64(i32 %arg) { +; RISCV32-LABEL: 'reduce_i64' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_i64' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) + %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef) + %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef) + %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef) + ret i32 undef +} + +declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8>) +declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>) +declare i16 @llvm.vector.reduce.add.v1i16(<1 x i16>) +declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.add.v128i16(<128 x i16>) +declare i32 @llvm.vector.reduce.add.v1i32(<1 x i32>) +declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.add.v64i32(<64 x i32>) +declare i32 @llvm.vector.reduce.add.v128i32(<128 x i32>) +declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.add.v32i64(<32 x i64>) +declare i64 @llvm.vector.reduce.add.v64i64(<64 x i64>) +declare i64 @llvm.vector.reduce.add.v128i64(<128 x i64>) diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll --- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=riscv32 -passes='print' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV32 -; RUN: opt < %s -mtriple=riscv64 -passes='print' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV64 +; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV32 +; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV64 define i32 @reduce_i1(i32 %arg) { ; RISCV32-LABEL: 'reduce_i1' @@ -36,6 +36,142 @@ ret i32 undef } +define i32 @reduce_i8(i32 %arg) { +; RISCV32-LABEL: 'reduce_i8' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_i8' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef) + ret i32 undef +} + +define i32 @reduce_i16(i32 %arg) { +; RISCV32-LABEL: 'reduce_i16' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_i16' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef) + %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef) + ret i32 undef +} + +define i32 @reduce_i32(i32 %arg) { +; RISCV32-LABEL: 'reduce_i32' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_i32' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef) + %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef) + %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef) + ret i32 undef +} + +define i32 @reduce_i64(i32 %arg) { +; RISCV32-LABEL: 'reduce_i64' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_i64' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef) + %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef) + %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef) + %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef) + ret i32 undef +} + declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1>) declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>) declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>) @@ -44,3 +180,35 @@ declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1>) declare i1 @llvm.vector.reduce.and.v64i1(<64 x i1>) declare i1 @llvm.vector.reduce.and.v128i1(<128 x i1>) +declare i8 @llvm.vector.reduce.and.v1i8(<1 x i8>) +declare i8 @llvm.vector.reduce.and.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.and.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.and.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.and.v128i8(<128 x i8>) +declare i16 @llvm.vector.reduce.and.v1i16(<1 x i16>) +declare i16 @llvm.vector.reduce.and.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.and.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.and.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.and.v128i16(<128 x i16>) +declare i32 @llvm.vector.reduce.and.v1i32(<1 x i32>) +declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.and.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.and.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.and.v64i32(<64 x i32>) +declare i32 @llvm.vector.reduce.and.v128i32(<128 x i32>) +declare i64 @llvm.vector.reduce.and.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.and.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.and.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.and.v32i64(<32 x i64>) +declare i64 @llvm.vector.reduce.and.v64i64(<64 x i64>) +declare i64 @llvm.vector.reduce.and.v128i64(<128 x i64>) diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh -riscv-v-vector-bits-min=256 -passes='print' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=FP-REDUCE + +define void @reduce_fadd_half() { +; FP-REDUCE-LABEL: 'reduce_fadd_half' +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void + %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef) + %V2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef) + %V4 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef) + %V8 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef) + %V16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef) + %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef) + %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef) + %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef) + ret void +} + +define void @reduce_fadd_float() { +; FP-REDUCE-LABEL: 'reduce_fadd_float' +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void + %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef) + %V2 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef) + %V4 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) + %V8 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) + %V16 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> undef) + %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef) + %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef) + %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef) + ret void +} + +define void @reduce_fadd_double() { +; FP-REDUCE-LABEL: 'reduce_fadd_double' +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void + %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef) + %V2 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) + %V4 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) + %V8 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.0, <8 x double> undef) + %V16 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.0, <16 x double> undef) + %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef) + %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef) + %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef) + ret void +} + +define void @reduce_oredered_fadd_half() { +; FP-REDUCE-LABEL: 'reduce_oredered_fadd_half' +; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) +; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) +; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) +; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 1 for instruction: ret void + %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef) + %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef) + %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef) + %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef) + %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef) + %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef) + %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef) + %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef) + ret void +} + +define void @reduce_oredered_fadd_float() { +; FP-REDUCE-LABEL: 'reduce_oredered_fadd_float' +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void + %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef) + %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef) + %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) + %V8 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) + %V16 = call float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> undef) + %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef) + %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef) + %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef) + ret void +} + +define void @reduce_oredered_fadd_double() { +; FP-REDUCE-LABEL: 'reduce_oredered_fadd_double' +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void + %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef) + %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) + %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) + %V8 = call double @llvm.vector.reduce.fadd.v8f64(double 0.0, <8 x double> undef) + %V16 = call double @llvm.vector.reduce.fadd.v16f64(double 0.0, <16 x double> undef) + %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef) + %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef) + %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef) + ret void +} + +declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>) +declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>) +declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>) +declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>) +declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>) +declare half @llvm.vector.reduce.fadd.v32f16(half, <32 x half>) +declare half @llvm.vector.reduce.fadd.v64f16(half, <64 x half>) +declare half @llvm.vector.reduce.fadd.v128f16(half, <128 x half>) +declare float @llvm.vector.reduce.fadd.v1f32(float, <1 x float>) +declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>) +declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) +declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>) +declare float @llvm.vector.reduce.fadd.v32f32(float, <32 x float>) +declare float @llvm.vector.reduce.fadd.v64f32(float, <64 x float>) +declare float @llvm.vector.reduce.fadd.v128f32(float, <128 x float>) +declare double @llvm.vector.reduce.fadd.v1f64(double, <1 x double>) +declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) +declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) +declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>) +declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>) +declare double @llvm.vector.reduce.fadd.v32f64(double, <32 x double>) +declare double @llvm.vector.reduce.fadd.v64f64(double, <64 x double>) +declare double @llvm.vector.reduce.fadd.v128f64(double, <128 x double>) diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll @@ -0,0 +1,340 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV32 +; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV64 + +define i32 @reduce_umax_i8(i32 %arg) { +; RISCV32-LABEL: 'reduce_umax_i8' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_umax_i8' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) + ret i32 undef +} + +define i32 @reduce_umax_i16(i32 %arg) { +; RISCV32-LABEL: 'reduce_umax_i16' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_umax_i16' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) + %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef) + ret i32 undef +} + +define i32 @reduce_umax_i32(i32 %arg) { +; RISCV32-LABEL: 'reduce_umax_i32' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_umax_i32' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) + %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef) + %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef) + ret i32 undef +} + +define i32 @reduce_umax_i64(i32 %arg) { +; RISCV32-LABEL: 'reduce_umax_i64' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_umax_i64' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) + %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef) + %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef) + %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef) + ret i32 undef +} + +define i32 @reduce_smax_i8(i32 %arg) { +; RISCV32-LABEL: 'reduce_smax_i8' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_smax_i8' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) + ret i32 undef +} + +define i32 @reduce_smax_i16(i32 %arg) { +; RISCV32-LABEL: 'reduce_smax_i16' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_smax_i16' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) + %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef) + ret i32 undef +} + +define i32 @reduce_smax_i32(i32 %arg) { +; RISCV32-LABEL: 'reduce_smax_i32' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_smax_i32' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) + %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef) + %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef) + ret i32 undef +} + +define i32 @reduce_smax_i64(i32 %arg) { +; RISCV32-LABEL: 'reduce_smax_i64' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_smax_i64' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) + %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef) + %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef) + %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef) + ret i32 undef +} + +declare i8 @llvm.vector.reduce.umax.v1i8(<1 x i8>) +declare i8 @llvm.vector.reduce.umax.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.umax.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>) +declare i16 @llvm.vector.reduce.umax.v1i16(<1 x i16>) +declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.umax.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.umax.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.umax.v128i16(<128 x i16>) +declare i32 @llvm.vector.reduce.umax.v1i32(<1 x i32>) +declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.umax.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.umax.v64i32(<64 x i32>) +declare i32 @llvm.vector.reduce.umax.v128i32(<128 x i32>) +declare i64 @llvm.vector.reduce.umax.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.umax.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.umax.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.umax.v32i64(<32 x i64>) +declare i64 @llvm.vector.reduce.umax.v64i64(<64 x i64>) +declare i64 @llvm.vector.reduce.umax.v128i64(<128 x i64>) +declare i8 @llvm.vector.reduce.smax.v1i8(<1 x i8>) +declare i8 @llvm.vector.reduce.smax.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>) +declare i16 @llvm.vector.reduce.smax.v1i16(<1 x i16>) +declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.smax.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.smax.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.smax.v128i16(<128 x i16>) +declare i32 @llvm.vector.reduce.smax.v1i32(<1 x i32>) +declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.smax.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.smax.v64i32(<64 x i32>) +declare i32 @llvm.vector.reduce.smax.v128i32(<128 x i32>) +declare i64 @llvm.vector.reduce.smax.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.smax.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.smax.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.smax.v32i64(<32 x i64>) +declare i64 @llvm.vector.reduce.smax.v64i64(<64 x i64>) +declare i64 @llvm.vector.reduce.smax.v128i64(<128 x i64>) diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll @@ -0,0 +1,340 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV32 +; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV64 + +define i32 @reduce_umin_i8(i32 %arg) { +; RISCV32-LABEL: 'reduce_umin_i8' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_umin_i8' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) + ret i32 undef +} + +define i32 @reduce_umin_i16(i32 %arg) { +; RISCV32-LABEL: 'reduce_umin_i16' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_umin_i16' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) + %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef) + ret i32 undef +} + +define i32 @reduce_umin_i32(i32 %arg) { +; RISCV32-LABEL: 'reduce_umin_i32' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_umin_i32' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) + %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef) + %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef) + ret i32 undef +} + +define i32 @reduce_umin_i64(i32 %arg) { +; RISCV32-LABEL: 'reduce_umin_i64' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_umin_i64' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) + %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef) + %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef) + %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef) + ret i32 undef +} + +define i32 @reduce_smin_i8(i32 %arg) { +; RISCV32-LABEL: 'reduce_smin_i8' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_smin_i8' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) + ret i32 undef +} + +define i32 @reduce_smin_i16(i32 %arg) { +; RISCV32-LABEL: 'reduce_smin_i16' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_smin_i16' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) + %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef) + ret i32 undef +} + +define i32 @reduce_smin_i32(i32 %arg) { +; RISCV32-LABEL: 'reduce_smin_i32' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_smin_i32' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) + %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef) + %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef) + ret i32 undef +} + +define i32 @reduce_smin_i64(i32 %arg) { +; RISCV32-LABEL: 'reduce_smin_i64' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_smin_i64' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) + %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef) + %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef) + %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef) + ret i32 undef +} + +declare i8 @llvm.vector.reduce.umin.v1i8(<1 x i8>) +declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.umin.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>) +declare i16 @llvm.vector.reduce.umin.v1i16(<1 x i16>) +declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.umin.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.umin.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.umin.v128i16(<128 x i16>) +declare i32 @llvm.vector.reduce.umin.v1i32(<1 x i32>) +declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.umin.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.umin.v64i32(<64 x i32>) +declare i32 @llvm.vector.reduce.umin.v128i32(<128 x i32>) +declare i64 @llvm.vector.reduce.umin.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.umin.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.umin.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.umin.v32i64(<32 x i64>) +declare i64 @llvm.vector.reduce.umin.v64i64(<64 x i64>) +declare i64 @llvm.vector.reduce.umin.v128i64(<128 x i64>) +declare i8 @llvm.vector.reduce.smin.v1i8(<1 x i8>) +declare i8 @llvm.vector.reduce.smin.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.smin.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>) +declare i16 @llvm.vector.reduce.smin.v1i16(<1 x i16>) +declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.smin.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.smin.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.smin.v128i16(<128 x i16>) +declare i32 @llvm.vector.reduce.smin.v1i32(<1 x i32>) +declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.smin.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.smin.v64i32(<64 x i32>) +declare i32 @llvm.vector.reduce.smin.v128i32(<128 x i32>) +declare i64 @llvm.vector.reduce.smin.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.smin.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.smin.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.smin.v32i64(<32 x i64>) +declare i64 @llvm.vector.reduce.smin.v64i64(<64 x i64>) +declare i64 @llvm.vector.reduce.smin.v128i64(<128 x i64>) diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll --- a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=riscv32 -passes='print' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV32 -; RUN: opt < %s -mtriple=riscv64 -passes='print' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV64 +; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV32 +; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV64 define i32 @reduce_i1(i32 %arg) { ; RISCV32-LABEL: 'reduce_i1' @@ -36,6 +36,142 @@ ret i32 undef } +define i32 @reduce_i8(i32 %arg) { +; RISCV32-LABEL: 'reduce_i8' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_i8' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef) + ret i32 undef +} + +define i32 @reduce_i16(i32 %arg) { +; RISCV32-LABEL: 'reduce_i16' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_i16' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef) + %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef) + ret i32 undef +} + +define i32 @reduce_i32(i32 %arg) { +; RISCV32-LABEL: 'reduce_i32' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_i32' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef) + %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef) + %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef) + ret i32 undef +} + +define i32 @reduce_i64(i32 %arg) { +; RISCV32-LABEL: 'reduce_i64' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_i64' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef) + %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef) + %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef) + %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef) + ret i32 undef +} + declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1>) declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>) declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>) @@ -44,3 +180,35 @@ declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1>) declare i1 @llvm.vector.reduce.or.v64i1(<64 x i1>) declare i1 @llvm.vector.reduce.or.v128i1(<128 x i1>) +declare i8 @llvm.vector.reduce.or.v1i8(<1 x i8>) +declare i8 @llvm.vector.reduce.or.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.or.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.or.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.or.v128i8(<128 x i8>) +declare i16 @llvm.vector.reduce.or.v1i16(<1 x i16>) +declare i16 @llvm.vector.reduce.or.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.or.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.or.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.or.v128i16(<128 x i16>) +declare i32 @llvm.vector.reduce.or.v1i32(<1 x i32>) +declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.or.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.or.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.or.v64i32(<64 x i32>) +declare i32 @llvm.vector.reduce.or.v128i32(<128 x i32>) +declare i64 @llvm.vector.reduce.or.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.or.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.or.v32i64(<32 x i64>) +declare i64 @llvm.vector.reduce.or.v64i64(<64 x i64>) +declare i64 @llvm.vector.reduce.or.v128i64(<128 x i64>) diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll @@ -0,0 +1,172 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV32 +; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV64 + +define i32 @reduce_i8(i32 %arg) { +; RISCV32-LABEL: 'reduce_i8' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_i8' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef) + %V2 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef) + ret i32 undef +} + +define i32 @reduce_i16(i32 %arg) { +; RISCV32-LABEL: 'reduce_i16' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_i16' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef) + %V2 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef) + %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef) + ret i32 undef +} + +define i32 @reduce_i32(i32 %arg) { +; RISCV32-LABEL: 'reduce_i32' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_i32' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef) + %V2 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef) + %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef) + %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef) + ret i32 undef +} + +define i32 @reduce_i64(i32 %arg) { +; RISCV32-LABEL: 'reduce_i64' +; RISCV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef) +; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; RISCV64-LABEL: 'reduce_i64' +; RISCV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef) +; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef) + %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef) + %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef) + %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef) + ret i32 undef +} + +declare i8 @llvm.vector.reduce.xor.v1i8(<1 x i8>) +declare i8 @llvm.vector.reduce.xor.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.xor.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.xor.v64i8(<64 x i8>) +declare i8 @llvm.vector.reduce.xor.v128i8(<128 x i8>) +declare i16 @llvm.vector.reduce.xor.v1i16(<1 x i16>) +declare i16 @llvm.vector.reduce.xor.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.xor.v32i16(<32 x i16>) +declare i16 @llvm.vector.reduce.xor.v64i16(<64 x i16>) +declare i16 @llvm.vector.reduce.xor.v128i16(<128 x i16>) +declare i32 @llvm.vector.reduce.xor.v1i32(<1 x i32>) +declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.xor.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.xor.v32i32(<32 x i32>) +declare i32 @llvm.vector.reduce.xor.v64i32(<64 x i32>) +declare i32 @llvm.vector.reduce.xor.v128i32(<128 x i32>) +declare i64 @llvm.vector.reduce.xor.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.xor.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.xor.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.xor.v32i64(<32 x i64>) +declare i64 @llvm.vector.reduce.xor.v64i64(<64 x i64>) +declare i64 @llvm.vector.reduce.xor.v128i64(<128 x i64>)