diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1289,26 +1289,32 @@ // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the // instructions. // - // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we + // TODO: Add additional widening operations (e.g., shl, etc.) once we // verify that their extending operands are eliminated during code // generation. switch (Opcode) { case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2). case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2). + case Instruction::Mul: // SMULL(2), UMULL(2) break; default: return false; } // To be a widening instruction (either the "wide" or "long" versions), the - // second operand must be a sign- or zero extend having a single user. We - // only consider extends having a single user because they may otherwise not - // be eliminated. + // second operand must be a sign- or zero extend. if (Args.size() != 2 || - (!isa(Args[1]) && !isa(Args[1])) || - !Args[1]->hasOneUse()) + (!isa(Args[1]) && !isa(Args[1]))) return false; auto *Extend = cast(Args[1]); + auto *Arg0 = dyn_cast(Args[0]); + + // A mul only has a mull version (not like addw). Both operands need to be + // extending and the same type. + if (Opcode == Instruction::Mul && + (!Arg0 || Arg0->getOpcode() != Extend->getOpcode() || + Arg0->getOperand(0)->getType() != Extend->getOperand(0)->getType())) + return false; // Legalize the destination type and ensure it can be used in a widening // operation. @@ -1346,7 +1352,7 @@ // If the cast is observable, and it is used by a widening instruction (e.g., // uaddl, saddw, etc.), it may be free. - if (I && I->hasOneUse()) { + if (I && I->hasOneUser()) { auto *SingleUser = cast(*I->user_begin()); SmallVector Operands(SingleUser->operand_values()); if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) { @@ -1831,8 +1837,6 @@ return Cost; } case ISD::MUL: - if (LT.second != MVT::v2i64) - return LT.first; // Since we do not have a MUL.2d instruction, a mul <2 x i64> is expensive // as elements are extracted from the vectors and the muls scalarized. // As getScalarizationOverhead is a bit too pessimistic, we estimate the @@ -1841,7 +1845,10 @@ // - two i64 inserts, and // - two muls. // So, for a v2i64 with LT.First = 1 the cost is 8, and for a v4i64 with - // LT.first = 2 the cost is 16. + // LT.first = 2 the cost is 16. If both operands are extensions it will not + // need to scalarize so the cost can be cheaper (smull or umull). + if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args)) + return LT.first; return LT.first * 8; case ISD::ADD: case ISD::XOR: diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll b/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll --- a/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll @@ -1585,14 +1585,14 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %azl_16_64 = mul <2 x i64> %zl1_16_64, %zl2_16_64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sw_32_64 = sext <2 x i32> %i32 to <2 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %asw_32_64 = mul <2 x i64> %i64, %sw_32_64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sl1_32_64 = sext <2 x i32> %i32 to <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sl2_32_64 = sext <2 x i32> %i32 to <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %asl_32_64 = mul <2 x i64> %sl1_32_64, %sl2_32_64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <2 x i32> %i32 to <2 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <2 x i32> %i32 to <2 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = mul <2 x i64> %sl1_32_64, %sl2_32_64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zw_32_64 = zext <2 x i32> %i32 to <2 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %azw_32_64 = mul <2 x i64> %i64, %zw_32_64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zl1_32_64 = zext <2 x i32> %i32 to <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zl2_32_64 = zext <2 x i32> %i32 to <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %azl_32_64 = mul <2 x i64> %zl1_32_64, %zl2_32_64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <2 x i32> %i32 to <2 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <2 x i32> %i32 to <2 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = mul <2 x i64> %zl1_32_64, %zl2_32_64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %sw_8_16 = sext <2 x i8> %i8 to <2 x i16> @@ -1704,13 +1704,13 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %azl_8_64 = mul <4 x i64> %zl1_8_64, %zl2_8_64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext <4 x i16> %i16 to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = mul <4 x i32> %i32, %sw_16_32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext <4 x i16> %i16 to <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext <4 x i16> %i16 to <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <4 x i16> %i16 to <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <4 x i16> %i16 to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = mul <4 x i32> %sl1_16_32, %sl2_16_32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext <4 x i16> %i16 to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = mul <4 x i32> %i32, %zw_16_32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext <4 x i16> %i16 to <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext <4 x i16> %i16 to <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <4 x i16> %i16 to <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <4 x i16> %i16 to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = mul <4 x i32> %zl1_16_32, %zl2_16_32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %asw_16_64 = mul <4 x i64> %i64, %sw_16_64 @@ -1724,14 +1724,14 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %azl_16_64 = mul <4 x i64> %zl1_16_64, %zl2_16_64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sw_32_64 = sext <4 x i32> %i32 to <4 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %asw_32_64 = mul <4 x i64> %i64, %sw_32_64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sl1_32_64 = sext <4 x i32> %i32 to <4 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sl2_32_64 = sext <4 x i32> %i32 to <4 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %asl_32_64 = mul <4 x i64> %sl1_32_64, %sl2_32_64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <4 x i32> %i32 to <4 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <4 x i32> %i32 to <4 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %asl_32_64 = mul <4 x i64> %sl1_32_64, %sl2_32_64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zw_32_64 = zext <4 x i32> %i32 to <4 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %azw_32_64 = mul <4 x i64> %i64, %zw_32_64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zl1_32_64 = zext <4 x i32> %i32 to <4 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zl2_32_64 = zext <4 x i32> %i32 to <4 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %azl_32_64 = mul <4 x i64> %zl1_32_64, %zl2_32_64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <4 x i32> %i32 to <4 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <4 x i32> %i32 to <4 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %azl_32_64 = mul <4 x i64> %zl1_32_64, %zl2_32_64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %sw_8_16 = sext <4 x i8> %i8 to <4 x i16> @@ -1813,13 +1813,13 @@ ; CHECK-LABEL: 'extmulv8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <8 x i8> %i8 to <8 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = mul <8 x i16> %i16, %sw_8_16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <8 x i8> %i8 to <8 x i16> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <8 x i8> %i8 to <8 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <8 x i16> %sl1_8_16, %sl2_8_16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <8 x i8> %i8 to <8 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = mul <8 x i16> %i16, %zw_8_16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <8 x i8> %i8 to <8 x i16> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <8 x i8> %i8 to <8 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <8 x i16> %zl1_8_16, %zl2_8_16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %asw_8_32 = mul <8 x i32> %i32, %sw_8_32 @@ -1843,13 +1843,13 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %azl_8_64 = mul <8 x i64> %zl1_8_64, %zl2_8_64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sw_16_32 = sext <8 x i16> %i16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %asw_16_32 = mul <8 x i32> %i32, %sw_16_32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sl1_16_32 = sext <8 x i16> %i16 to <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sl2_16_32 = sext <8 x i16> %i16 to <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <8 x i16> %i16 to <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <8 x i16> %i16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %asl_16_32 = mul <8 x i32> %sl1_16_32, %sl2_16_32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zw_16_32 = zext <8 x i16> %i16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %azw_16_32 = mul <8 x i32> %i32, %zw_16_32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zl1_16_32 = zext <8 x i16> %i16 to <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zl2_16_32 = zext <8 x i16> %i16 to <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <8 x i16> %i16 to <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <8 x i16> %i16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %azl_16_32 = mul <8 x i32> %zl1_16_32, %zl2_16_32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %asw_16_64 = mul <8 x i64> %i64, %sw_16_64 @@ -1863,14 +1863,14 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %azl_16_64 = mul <8 x i64> %zl1_16_64, %zl2_16_64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sw_32_64 = sext <8 x i32> %i32 to <8 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %asw_32_64 = mul <8 x i64> %i64, %sw_32_64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sl1_32_64 = sext <8 x i32> %i32 to <8 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sl2_32_64 = sext <8 x i32> %i32 to <8 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %asl_32_64 = mul <8 x i64> %sl1_32_64, %sl2_32_64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <8 x i32> %i32 to <8 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <8 x i32> %i32 to <8 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %asl_32_64 = mul <8 x i64> %sl1_32_64, %sl2_32_64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %zw_32_64 = zext <8 x i32> %i32 to <8 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %azw_32_64 = mul <8 x i64> %i64, %zw_32_64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %zl1_32_64 = zext <8 x i32> %i32 to <8 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %zl2_32_64 = zext <8 x i32> %i32 to <8 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %azl_32_64 = mul <8 x i64> %zl1_32_64, %zl2_32_64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <8 x i32> %i32 to <8 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <8 x i32> %i32 to <8 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %azl_32_64 = mul <8 x i64> %zl1_32_64, %zl2_32_64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %sw_8_16 = sext <8 x i8> %i8 to <8 x i16> @@ -1952,13 +1952,13 @@ ; CHECK-LABEL: 'extmulv16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sw_8_16 = sext <16 x i8> %i8 to <16 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %asw_8_16 = mul <16 x i16> %i16, %sw_8_16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sl1_8_16 = sext <16 x i8> %i8 to <16 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sl2_8_16 = sext <16 x i8> %i8 to <16 x i16> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <16 x i8> %i8 to <16 x i16> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <16 x i8> %i8 to <16 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %asl_8_16 = mul <16 x i16> %sl1_8_16, %sl2_8_16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zw_8_16 = zext <16 x i8> %i8 to <16 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %azw_8_16 = mul <16 x i16> %i16, %zw_8_16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zl1_8_16 = zext <16 x i8> %i8 to <16 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zl2_8_16 = zext <16 x i8> %i8 to <16 x i16> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <16 x i8> %i8 to <16 x i16> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <16 x i8> %i8 to <16 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %azl_8_16 = mul <16 x i16> %zl1_8_16, %zl2_8_16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %asw_8_32 = mul <16 x i32> %i32, %sw_8_32 @@ -1982,13 +1982,13 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %azl_8_64 = mul <16 x i64> %zl1_8_64, %zl2_8_64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sw_16_32 = sext <16 x i16> %i16 to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %asw_16_32 = mul <16 x i32> %i32, %sw_16_32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sl1_16_32 = sext <16 x i16> %i16 to <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sl2_16_32 = sext <16 x i16> %i16 to <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <16 x i16> %i16 to <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <16 x i16> %i16 to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %asl_16_32 = mul <16 x i32> %sl1_16_32, %sl2_16_32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %zw_16_32 = zext <16 x i16> %i16 to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %azw_16_32 = mul <16 x i32> %i32, %zw_16_32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %zl1_16_32 = zext <16 x i16> %i16 to <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %zl2_16_32 = zext <16 x i16> %i16 to <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <16 x i16> %i16 to <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <16 x i16> %i16 to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %azl_16_32 = mul <16 x i32> %zl1_16_32, %zl2_16_32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %asw_16_64 = mul <16 x i64> %i64, %sw_16_64 @@ -2002,14 +2002,14 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %azl_16_64 = mul <16 x i64> %zl1_16_64, %zl2_16_64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sw_32_64 = sext <16 x i32> %i32 to <16 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %asw_32_64 = mul <16 x i64> %i64, %sw_32_64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sl1_32_64 = sext <16 x i32> %i32 to <16 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sl2_32_64 = sext <16 x i32> %i32 to <16 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %asl_32_64 = mul <16 x i64> %sl1_32_64, %sl2_32_64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <16 x i32> %i32 to <16 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <16 x i32> %i32 to <16 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %asl_32_64 = mul <16 x i64> %sl1_32_64, %sl2_32_64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %zw_32_64 = zext <16 x i32> %i32 to <16 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %azw_32_64 = mul <16 x i64> %i64, %zw_32_64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %zl1_32_64 = zext <16 x i32> %i32 to <16 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %zl2_32_64 = zext <16 x i32> %i32 to <16 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %azl_32_64 = mul <16 x i64> %zl1_32_64, %zl2_32_64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <16 x i32> %i32 to <16 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <16 x i32> %i32 to <16 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %azl_32_64 = mul <16 x i64> %zl1_32_64, %zl2_32_64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>