diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18311,6 +18311,16 @@ // we can convert that DUP into another extract_high (of a bigger DUP), which // helps the backend to decide that an sabdl2 would be useful, saving a real // extract_high operation. + + // zext( shl (vscalei32, constant), i64 ) + + // if(N->getOpcode() == ISD::ZERO_EXTEND && + // N->getOperand(0).getOpcode() == ISD::SHL && + // N->getOperand(0).getOperand(0).getOpcode() == intrinsic::vscale.i32) { + + // return N->getOperand(0).getNode(); + // } + if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND && (N->getOperand(0).getOpcode() == ISD::ABDU || N->getOperand(0).getOpcode() == ISD::ABDS)) { diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2059,6 +2059,17 @@ { ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0 }, { ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0 }, { ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0 }, + + // Add cost for extending to illegal -too wide- scalable vectors. + // zero/sign extend are implemented by multiple unpack operations, + // where each operation has a cost of 2. + { ISD::ZERO_EXTEND, MVT::nxv16i16, MVT::nxv16i8, 6}, + { ISD::ZERO_EXTEND, MVT::nxv16i32, MVT::nxv16i8, 12}, + { ISD::ZERO_EXTEND, MVT::nxv16i64, MVT::nxv16i8, 28}, + + { ISD::SIGN_EXTEND, MVT::nxv16i16, MVT::nxv16i8, 6}, + { ISD::SIGN_EXTEND, MVT::nxv16i32, MVT::nxv16i8, 12}, + { ISD::SIGN_EXTEND, MVT::nxv16i64, MVT::nxv16i8, 28}, }; if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD, diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-ext.ll b/llvm/test/Analysis/CostModel/AArch64/sve-ext.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/sve-ext.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define void @sve_ext() { +; CHECK-LABEL: 'sve_ext' +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %zext_nxv16_i8_to_i16 = zext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %zext_nxv16_i8_to_i32 = zext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %zext_nxv16_i8_to_i64 = zext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_nxv16_i8_to_i16 = sext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %sext_nxv16_i8_to_i32 = sext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %sext_nxv16_i8_to_i64 = sext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %zext_nxv16_i8_to_i16 = zext undef to + %zext_nxv16_i8_to_i32 = zext undef to + %zext_nxv16_i8_to_i64 = zext undef to + + %sext_nxv16_i8_to_i16 = sext undef to + %sext_nxv16_i8_to_i32 = sext undef to + %sext_nxv16_i8_to_i64 = sext undef to + + ret void +}