diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -191,7 +191,7 @@ EVT DstTy = TLI->getValueType(DL, Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) - return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind); + return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I); // The extend of a load is free if (I && isa(I->getOperand(0))) { @@ -229,18 +229,53 @@ } } + // NEON vector operations that can extend their inputs. + if ((ISD == ISD::SIGN_EXTEND || ISD == ISD::ZERO_EXTEND) && + I && I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) { + static const TypeConversionCostTblEntry NEONDoubleWidthTbl[] = { + // vaddl + { ISD::ADD, MVT::v4i32, MVT::v4i16, 0 }, + { ISD::ADD, MVT::v8i16, MVT::v8i8, 0 }, + // vsubl + { ISD::SUB, MVT::v4i32, MVT::v4i16, 0 }, + { ISD::SUB, MVT::v8i16, MVT::v8i8, 0 }, + // vmull + { ISD::MUL, MVT::v4i32, MVT::v4i16, 0 }, + { ISD::MUL, MVT::v8i16, MVT::v8i8, 0 }, + // vshll + { ISD::SHL, MVT::v4i32, MVT::v4i16, 0 }, + { ISD::SHL, MVT::v8i16, MVT::v8i8, 0 }, + }; + + auto *User = cast(*I->user_begin()); + int UserISD = TLI->InstructionOpcodeToISD(User->getOpcode()); + if (auto *Entry = ConvertCostTableLookup(NEONDoubleWidthTbl, UserISD, + DstTy.getSimpleVT(), + SrcTy.getSimpleVT())) { + return Entry->Cost; + } + } + // Some arithmetic, load and store operations have specific instructions // to cast up/down their types automatically at no extra cost. // TODO: Get these tables to know at least what the related operations are. static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = { - { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, - { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, // The number of vmovl instructions for the extension. + { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 }, + { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i8, 3 }, + { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 2 }, + { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, @@ -422,7 +457,7 @@ int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy() ? ST->getMVEVectorCostFactor() : 1; - return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind); + return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I); } int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/add-cast-vect.ll rename from llvm/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll rename to llvm/test/Analysis/CostModel/ARM/add-cast-vect.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll +++ b/llvm/test/Analysis/CostModel/ARM/add-cast-vect.ll @@ -20,9 +20,9 @@ ; ASM: vld1.64 %v1 = load %T432, %T432* %loadaddr2 ; ASM: vld1.64 - %r3 = mul %T432 %v0, %v1 -; COST: cost of 2 for instruction: {{.*}} mul <4 x i32> -; ASM: vmul.i32 + %r3 = add %T432 %v0, %v1 +; COST: cost of 1 for instruction: {{.*}} add <4 x i32> +; ASM: vadd.i32 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 ret void @@ -37,9 +37,9 @@ %r1 = sext %T416 %v0 to %T432 %r2 = sext %T416 %v1 to %T432 ; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32> - %r3 = mul %T432 %r1, %r2 -; COST: cost of 2 for instruction: {{.*}} mul <4 x i32> -; ASM: vmull.s16 + %r3 = add %T432 %r1, %r2 +; COST: cost of 1 for instruction: {{.*}} add <4 x i32> +; ASM: vaddl.s16 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 ret void @@ -54,9 +54,9 @@ %r1 = zext %T416 %v0 to %T432 %r2 = zext %T416 %v1 to %T432 ; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32> - %r3 = mul %T432 %r1, %r2 -; COST: cost of 2 for instruction: {{.*}} mul <4 x i32> -; ASM: vmull.u16 + %r3 = add %T432 %r1, %r2 +; COST: cost of 1 for instruction: {{.*}} add <4 x i32> +; ASM: vaddl.u16 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 ret void @@ -68,9 +68,9 @@ ; ASM: vldr %v1 = load %T232, %T232* %loadaddr2 ; ASM: vldr - %r3 = mul %T232 %v0, %v1 -; ASM: vmul.i32 -; COST: cost of 1 for instruction: {{.*}} mul <2 x i32> + %r3 = add %T232 %v0, %v1 +; ASM: vadd.i32 +; COST: cost of 1 for instruction: {{.*}} add <2 x i32> %st = sext %T232 %r3 to %T264 ; ASM: vmovl.s32 ; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64> @@ -85,9 +85,9 @@ ; ASM: vldr %v1 = load %T232, %T232* %loadaddr2 ; ASM: vldr - %r3 = mul %T232 %v0, %v1 -; ASM: vmul.i32 -; COST: cost of 1 for instruction: {{.*}} mul <2 x i32> + %r3 = add %T232 %v0, %v1 +; ASM: vadd.i32 +; COST: cost of 1 for instruction: {{.*}} add <2 x i32> %st = zext %T232 %r3 to %T264 ; ASM: vmovl.u32 ; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64> @@ -102,9 +102,9 @@ ; ASM: vld1.64 %v1 = load %T432, %T432* %loadaddr2 ; ASM: vld1.64 - %r3 = mul %T432 %v0, %v1 -; ASM: vmul.i32 -; COST: cost of 2 for instruction: {{.*}} mul <4 x i32> + %r3 = add %T432 %v0, %v1 +; ASM: vadd.i32 +; COST: cost of 1 for instruction: {{.*}} add <4 x i32> %st = trunc %T432 %r3 to %T416 ; ASM: vmovn.i32 ; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16> diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll --- a/llvm/test/Analysis/CostModel/ARM/cast.ll +++ b/llvm/test/Analysis/CostModel/ARM/cast.ll @@ -77,14 +77,14 @@ ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r67 = uitofp i64 undef to float ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r68 = sitofp i64 undef to double ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r69 = uitofp i64 undef to double -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> @@ -93,10 +93,10 @@ ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> @@ -1145,14 +1145,14 @@ ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r67 = uitofp i64 undef to float ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r68 = sitofp i64 undef to double ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r69 = uitofp i64 undef to double -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> @@ -1161,10 +1161,10 @@ ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> @@ -1668,14 +1668,14 @@ ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -1746,7 +1746,7 @@ ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> @@ -1782,7 +1782,7 @@ ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> @@ -1812,14 +1812,14 @@ ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll copy from llvm/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll copy to llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll copy from llvm/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll copy to llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll +++ b/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll @@ -20,9 +20,9 @@ ; ASM: vld1.64 %v1 = load %T432, %T432* %loadaddr2 ; ASM: vld1.64 - %r3 = mul %T432 %v0, %v1 -; COST: cost of 2 for instruction: {{.*}} mul <4 x i32> -; ASM: vmul.i32 + %r3 = shl %T432 %v0, %v1 +; COST: cost of 2 for instruction: {{.*}} shl <4 x i32> +; ASM: vshl.i32 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 ret void @@ -37,9 +37,9 @@ %r1 = sext %T416 %v0 to %T432 %r2 = sext %T416 %v1 to %T432 ; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32> - %r3 = mul %T432 %r1, %r2 -; COST: cost of 2 for instruction: {{.*}} mul <4 x i32> -; ASM: vmull.s16 + %r3 = shl %T432 %r1, %r2 +; COST: cost of 2 for instruction: {{.*}} shl <4 x i32> +; ASM: vshll.s16 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 ret void @@ -54,9 +54,9 @@ %r1 = zext %T416 %v0 to %T432 %r2 = zext %T416 %v1 to %T432 ; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32> - %r3 = mul %T432 %r1, %r2 -; COST: cost of 2 for instruction: {{.*}} mul <4 x i32> -; ASM: vmull.u16 + %r3 = shl %T432 %r1, %r2 +; COST: cost of 2 for instruction: {{.*}} shl <4 x i32> +; ASM: vshll.u16 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 ret void @@ -68,9 +68,9 @@ ; ASM: vldr %v1 = load %T232, %T232* %loadaddr2 ; ASM: vldr - %r3 = mul %T232 %v0, %v1 -; ASM: vmul.i32 -; COST: cost of 1 for instruction: {{.*}} mul <2 x i32> + %r3 = shl %T232 %v0, %v1 +; ASM: vshl.i32 +; COST: cost of 2 for instruction: {{.*}} shl <2 x i32> %st = sext %T232 %r3 to %T264 ; ASM: vmovl.s32 ; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64> @@ -85,9 +85,9 @@ ; ASM: vldr %v1 = load %T232, %T232* %loadaddr2 ; ASM: vldr - %r3 = mul %T232 %v0, %v1 -; ASM: vmul.i32 -; COST: cost of 1 for instruction: {{.*}} mul <2 x i32> + %r3 = shl %T232 %v0, %v1 +; ASM: vshl.i32 +; COST: cost of 2 for instruction: {{.*}} shl <2 x i32> %st = zext %T232 %r3 to %T264 ; ASM: vmovl.u32 ; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64> @@ -102,9 +102,9 @@ ; ASM: vld1.64 %v1 = load %T432, %T432* %loadaddr2 ; ASM: vld1.64 - %r3 = mul %T432 %v0, %v1 -; ASM: vmul.i32 -; COST: cost of 2 for instruction: {{.*}} mul <4 x i32> + %r3 = shl %T432 %v0, %v1 +; ASM: vshl.i32 +; COST: cost of 2 for instruction: {{.*}} shl <4 x i32> %st = trunc %T432 %r3 to %T416 ; ASM: vmovn.i32 ; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16> diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll rename from llvm/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll rename to llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll +++ b/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll @@ -20,9 +20,9 @@ ; ASM: vld1.64 %v1 = load %T432, %T432* %loadaddr2 ; ASM: vld1.64 - %r3 = mul %T432 %v0, %v1 -; COST: cost of 2 for instruction: {{.*}} mul <4 x i32> -; ASM: vmul.i32 + %r3 = sub %T432 %v0, %v1 +; COST: cost of 1 for instruction: {{.*}} sub <4 x i32> +; ASM: vsub.i32 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 ret void @@ -37,9 +37,9 @@ %r1 = sext %T416 %v0 to %T432 %r2 = sext %T416 %v1 to %T432 ; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32> - %r3 = mul %T432 %r1, %r2 -; COST: cost of 2 for instruction: {{.*}} mul <4 x i32> -; ASM: vmull.s16 + %r3 = sub %T432 %r1, %r2 +; COST: cost of 1 for instruction: {{.*}} sub <4 x i32> +; ASM: vsubl.s16 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 ret void @@ -54,9 +54,9 @@ %r1 = zext %T416 %v0 to %T432 %r2 = zext %T416 %v1 to %T432 ; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32> - %r3 = mul %T432 %r1, %r2 -; COST: cost of 2 for instruction: {{.*}} mul <4 x i32> -; ASM: vmull.u16 + %r3 = sub %T432 %r1, %r2 +; COST: cost of 1 for instruction: {{.*}} sub <4 x i32> +; ASM: vsubl.u16 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 ret void @@ -68,9 +68,9 @@ ; ASM: vldr %v1 = load %T232, %T232* %loadaddr2 ; ASM: vldr - %r3 = mul %T232 %v0, %v1 -; ASM: vmul.i32 -; COST: cost of 1 for instruction: {{.*}} mul <2 x i32> + %r3 = sub %T232 %v0, %v1 +; ASM: vsub.i32 +; COST: cost of 1 for instruction: {{.*}} sub <2 x i32> %st = sext %T232 %r3 to %T264 ; ASM: vmovl.s32 ; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64> @@ -85,9 +85,9 @@ ; ASM: vldr %v1 = load %T232, %T232* %loadaddr2 ; ASM: vldr - %r3 = mul %T232 %v0, %v1 -; ASM: vmul.i32 -; COST: cost of 1 for instruction: {{.*}} mul <2 x i32> + %r3 = sub %T232 %v0, %v1 +; ASM: vsub.i32 +; COST: cost of 1 for instruction: {{.*}} sub <2 x i32> %st = zext %T232 %r3 to %T264 ; ASM: vmovl.u32 ; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64> @@ -102,9 +102,9 @@ ; ASM: vld1.64 %v1 = load %T432, %T432* %loadaddr2 ; ASM: vld1.64 - %r3 = mul %T432 %v0, %v1 -; ASM: vmul.i32 -; COST: cost of 2 for instruction: {{.*}} mul <4 x i32> + %r3 = sub %T432 %v0, %v1 +; ASM: vsub.i32 +; COST: cost of 1 for instruction: {{.*}} sub <4 x i32> %st = trunc %T432 %r3 to %T416 ; ASM: vmovn.i32 ; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16>