diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -5285,7 +5285,6 @@ unsigned VF = VecTy->getNumElements() / Factor; MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF); - // FIXME: this is the most conservative estimate for the mask cost. InstructionCost MaskCost; if (UseMaskForCond || UseMaskForGaps) { APInt DemandedLoadStoreElts = APInt::getZero(VecTy->getNumElements()); @@ -5295,10 +5294,10 @@ DemandedLoadStoreElts.setBit(Index + Elm * Factor); } - Type *I8Type = Type::getInt8Ty(VecTy->getContext()); + Type *I1Type = Type::getInt1Ty(VecTy->getContext()); MaskCost = getReplicationShuffleCost( - I8Type, Factor, VF, + I1Type, Factor, VF, UseMaskForGaps ? DemandedLoadStoreElts : APInt::getAllOnes(VecTy->getNumElements()), CostKind); @@ -5309,7 +5308,7 @@ // memory access, we need to account for the cost of And-ing the two masks // inside the loop. if (UseMaskForGaps) { - auto *MaskVT = FixedVectorType::get(I8Type, VecTy->getNumElements()); + auto *MaskVT = FixedVectorType::get(I1Type, VecTy->getNumElements()); MaskCost += getArithmeticInstrCost(BinaryOperator::And, MaskVT, CostKind); } } diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll @@ -43,13 +43,13 @@ ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 31 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) { entry: @@ -110,13 +110,13 @@ ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 31 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 define void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) { entry: