Index: lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.h +++ lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -166,6 +166,9 @@ bool useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const; + + int getArithmeticReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwiseForm); /// @} }; Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -884,3 +884,29 @@ } return false; } + +int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy, + bool IsPairwise) { + + std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + MVT MTy = LT.second; + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + // Horizontal adds can use the 'addv' instruction. We model the cost of these + // instructions as normal vector adds. This is the only arithmetic vector + // reduction operation for which we have an instruction. + static const CostTblEntry CostTblNoPairwise[]{ + {ISD::ADD, MVT::v8i8, 1}, + {ISD::ADD, MVT::v16i8, 1}, + {ISD::ADD, MVT::v4i16, 1}, + {ISD::ADD, MVT::v8i16, 1}, + {ISD::ADD, MVT::v4i32, 1}, + }; + + if (!IsPairwise) + if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy)) + return LT.first * Entry->Cost; + + return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise); +} Index: test/Analysis/CostModel/AArch64/vector-reduce.ll =================================================================== --- test/Analysis/CostModel/AArch64/vector-reduce.ll +++ test/Analysis/CostModel/AArch64/vector-reduce.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE ; COST-LABEL: add.i8.v8i8 -; COST: Found an estimated cost of 27 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> %v) +; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> %v) ; CODE-LABEL: add.i8.v8i8 ; CODE: addv b0, v0.8b define i8 @add.i8.v8i8(<8 x i8> %v) { @@ -11,7 +11,7 @@ } ; COST-LABEL: add.i8.v16i8 -; COST: Found an estimated cost of 53 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %v) +; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %v) ; CODE-LABEL: add.i8.v16i8 ; CODE: addv b0, v0.16b define i8 @add.i8.v16i8(<16 x i8> %v) { @@ -20,7 +20,7 @@ } ; COST-LABEL: add.i16.v4i16 -; COST: Found an estimated cost of 13 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> %v) +; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> %v) ; CODE-LABEL: add.i16.v4i16 ; CODE: addv h0, v0.4h define i16 @add.i16.v4i16(<4 x i16> %v) { @@ -29,7 +29,7 @@ } ; COST-LABEL: add.i16.v8i16 -; COST: Found an estimated cost of 27 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %v) +; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %v) ; CODE-LABEL: add.i16.v8i16 ; CODE: addv h0, v0.8h define i16 @add.i16.v8i16(<8 x i16> %v) { @@ -38,7 +38,7 @@ } ; COST-LABEL: add.i32.v4i32 -; COST: Found an estimated cost of 13 for instruction: %r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %v) +; COST: Found an estimated cost of 1 for instruction: %r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %v) ; CODE-LABEL: add.i32.v4i32 ; CODE: addv s0, v0.4s define i32 @add.i32.v4i32(<4 x i32> %v) { Index: test/Transforms/SLPVectorizer/AArch64/horizontal.ll =================================================================== --- test/Transforms/SLPVectorizer/AArch64/horizontal.ll +++ test/Transforms/SLPVectorizer/AArch64/horizontal.ll @@ -21,7 +21,7 @@ ; YAML-NEXT: Function: test_select ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost ' -; YAML-NEXT: - Cost: '4' +; YAML-NEXT: - Cost: '-8' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '8' @@ -115,7 +115,7 @@ ; YAML-NEXT: Function: reduction_with_br ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost ' -; YAML-NEXT: - Cost: '1' +; YAML-NEXT: - Cost: '-11' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '3' @@ -183,7 +183,7 @@ ; YAML-NEXT: Function: test_unrolled_select ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost ' -; YAML-NEXT: - Cost: '-33' +; YAML-NEXT: - Cost: '-47' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '10'