Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -637,8 +637,17 @@ } return Cost; - case ISD::ADD: case ISD::MUL: + { + // Since we do not have a MUL.2d instruction, a mul <2 x i64> is expensive + // as elements are extracted from the vectors and the muls scalarized. + auto *VecTy = dyn_cast(Ty); + bool IsInt64 = Ty->getScalarType()->isIntegerTy(64); + if (VecTy && IsInt64) + return 1 * VecTy->getNumElements() + VecTy->getNumElements(); + return (Cost + 1) * LT.first; + } + case ISD::ADD: case ISD::XOR: case ISD::OR: case ISD::AND: Index: llvm/test/Analysis/CostModel/AArch64/mul.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/mul.ll +++ llvm/test/Analysis/CostModel/AArch64/mul.ll @@ -113,7 +113,7 @@ define <2 x i64> @t13(<2 x i64> %a, <2 x i64> %b) { ; THROUGHPUT-LABEL: 't13' -; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = mul nsw <2 x i64> %a, %b +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = mul nsw <2 x i64> %a, %b ; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %1 ; %1 = mul nsw <2 x i64> %a, %b @@ -122,7 +122,7 @@ define <4 x i64> @t14(<4 x i64> %a, <4 x i64> %b) { ; THROUGHPUT-LABEL: 't14' -; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = mul nsw <4 x i64> %a, %b +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = mul nsw <4 x i64> %a, %b ; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1 ; %1 = mul nsw <4 x i64> %a, %b Index: llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll @@ -9,8 +9,8 @@ ; leaving cost 3 for scalarizing the result + 2 for executing the op with VF 2. ; CM: LV: Scalar loop costs: 7. -; CM: LV: Found an estimated cost of 5 for VF 2 For instruction: %a = extractvalue { i64, i64 } %sv, 0 -; CM-NEXT: LV: Found an estimated cost of 5 for VF 2 For instruction: %b = extractvalue { i64, i64 } %sv, 1 +; CM: LV: Found an estimated cost of 11 for VF 2 For instruction: %a = extractvalue { i64, i64 } %sv, 0 +; CM-NEXT: LV: Found an estimated cost of 11 for VF 2 For instruction: %b = extractvalue { i64, i64 } %sv, 1 ; Check that the extractvalue operands are actually free in vector code.