Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11015,10 +11015,12 @@ TargetLoweringBase::LegalizeTypeAction AArch64TargetLowering::getPreferredVectorAction(EVT VT) const { MVT SVT = VT.getSimpleVT(); - // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8, - // v4i16, v2i32 instead of to promote. - if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32 - || SVT == MVT::v1f32) + // During type legalization, we prefer to widen v1i8, v2i8, v4i8, v1i16, + // v2i16, v1i32, v1f32 to v8i8, v4i16, v2i32, v2f32 instead of to promote. + if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32 || + SVT == MVT::v1f32 + || SVT == MVT::v2i8 || SVT == MVT::v4i8 || SVT == MVT::v2i16 + ) return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -97,7 +97,7 @@ bool NegativeImmediates = true; // Enable 64-bit vectorization in SLP. - unsigned MinVectorRegisterBitWidth = 64; + unsigned MinVectorRegisterBitWidth = 16; bool UseAA = false; bool PredictableSelectIsExpensive = false; Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -612,16 +612,6 @@ return LT.first * 2 * AmortizationCost; } - if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8) && - Ty->getVectorNumElements() < 8) { - // We scalarize the loads/stores because there is not v.4b register and we - // have to promote the elements to v.4h. - unsigned NumVecElts = Ty->getVectorNumElements(); - unsigned NumVectorizableInstsToAmortize = NumVecElts * 2; - // We generate 2 instructions per vector element. - return NumVectorizableInstsToAmortize * NumVecElts * 2; - } - return LT.first; }