diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -228,12 +228,39 @@ {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 0}, {ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 0}, {ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 0}, + // The following extend from a legal type to an illegal type, so need to + // split the load. This introduced an extra load operation, but the + // extend is still "free". + {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1}, + {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1}, + {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 3}, + {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 3}, + {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1}, + {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1}, }; if (SrcTy.isVector() && ST->hasMVEIntegerOps()) { if (const auto *Entry = ConvertCostTableLookup(MVELoadConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) - return AdjustCost(Entry->Cost); + return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor()); + } + } + + // The truncate of a store is free. This is the mirror of extends above. + if (I && I->hasOneUse() && isa(*I->user_begin())) { + static const TypeConversionCostTblEntry MVELoadConversionTbl[] = { + {ISD::TRUNCATE, MVT::v4i32, MVT::v4i16, 0}, + {ISD::TRUNCATE, MVT::v4i32, MVT::v4i8, 0}, + {ISD::TRUNCATE, MVT::v8i16, MVT::v8i8, 0}, + {ISD::TRUNCATE, MVT::v8i32, MVT::v8i16, 1}, + {ISD::TRUNCATE, MVT::v16i32, MVT::v16i8, 3}, + {ISD::TRUNCATE, MVT::v16i16, MVT::v16i8, 1}, + }; + if (SrcTy.isVector() && ST->hasMVEIntegerOps()) { + if (const auto *Entry = + ConvertCostTableLookup(MVELoadConversionTbl, ISD, SrcTy.getSimpleVT(), + DstTy.getSimpleVT())) + return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor()); } } diff --git a/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll b/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll --- a/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll +++ b/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll @@ -122,8 +122,8 @@ ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32> @@ -758,7 +758,7 @@ ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>