diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2145,6 +2145,13 @@ FP16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) return AdjustCost(Entry->Cost); + // The BasicTTIImpl version only deals with CCH==TTI::CastContextHint::Normal, + // but we also want to include the TTI::CastContextHint::Masked case too. + if ((ISD == ISD::ZERO_EXTEND || ISD == ISD::SIGN_EXTEND) && + CCH == TTI::CastContextHint::Masked && ST->hasSVEorSME() && + TLI->isTypeLegal(DstTy)) + CCH = TTI::CastContextHint::Normal; + return AdjustCost( BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); } diff --git a/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll b/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll --- a/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll +++ b/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll @@ -116,23 +116,23 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv16i8.3 = call @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %zext.nxv16i8to64 = zext %load.nxv16i8.3 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv8i8 = call @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext.nxv8i8to16 = zext %load.nxv8i8 to +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv8i8to16 = zext %load.nxv8i8 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv4i8 = call @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext.nxv4i8to32 = zext %load.nxv4i8 to +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv4i8to32 = zext %load.nxv4i8 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv2i8 = call @llvm.masked.load.nxv2i8.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext.nxv2i8to64 = zext %load.nxv2i8 to +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv2i8to64 = zext %load.nxv2i8 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv8i16 = call @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zext.nxv8i16to32 = zext %load.nxv8i16 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv8i16.2 = call @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %zext.nxv8i16to64 = zext %load.nxv8i16.2 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv4i16 = call @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext.nxv4i16to32 = zext %load.nxv4i16 to +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv4i16to32 = zext %load.nxv4i16 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv2i16 = call @llvm.masked.load.nxv2i16.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext.nxv2i16to64 = zext %load.nxv2i16 to +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv2i16to64 = zext %load.nxv2i16 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv4i32 = call @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zext.nxv4i32to64 = zext %load.nxv4i32 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv2i32 = call @llvm.masked.load.nxv2i32.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext.nxv2i32to64 = zext %load.nxv2i32 to +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv2i32to64 = zext %load.nxv2i32 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv16i8 = call @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext.nxv16i8to16 = sext %load2.nxv16i8 to @@ -141,23 +141,23 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv16i8.3 = call @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext.nxv16i8to64 = sext %load2.nxv16i8.3 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv8i8 = call @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext.nxv8i8to16 = sext %load2.nxv8i8 to +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv8i8to16 = sext %load2.nxv8i8 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv4i8 = call @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext.nxv4i8to32 = sext %load2.nxv4i8 to +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv4i8to32 = sext %load2.nxv4i8 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv2i8 = call @llvm.masked.load.nxv2i8.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext.nxv2i8to64 = sext %load2.nxv2i8 to +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv2i8to64 = sext %load2.nxv2i8 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv8i16 = call @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext.nxv8i16to32 = sext %load2.nxv8i16 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv8i16.2 = call @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext.nxv8i16to64 = sext %load2.nxv8i16.2 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv4i16 = call @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext.nxv4i16to32 = sext %load2.nxv4i16 to +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv4i16to32 = sext %load2.nxv4i16 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv2i16 = call @llvm.masked.load.nxv2i16.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext.nxv2i16to64 = sext %load2.nxv2i16 to +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv2i16to64 = sext %load2.nxv2i16 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv4i32 = call @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext.nxv4i32to64 = sext %load2.nxv4i32 to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv2i32 = call @llvm.masked.load.nxv2i32.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext.nxv2i32to64 = sext %load2.nxv2i32 to +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv2i32to64 = sext %load2.nxv2i32 to %load.nxv16i8 = call @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, undef, undef) %zext.nxv16i8to16 = zext %load.nxv16i8 to