Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2270,7 +2270,7 @@ if (cast(Src)->getElementCount() == ElementCount::getScalable(1)) return InstructionCost::getInvalid(); - return LT.first * 2; + return LT.first; } static unsigned getSVEGatherScatterOverhead(unsigned Opcode) { Index: llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll +++ llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll @@ -56,25 +56,25 @@ define void @scalable() { ; CHECK-LABEL: 'scalable' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8 = call @llvm.masked.load.nxv2i8.p0nxv2i8(* undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8 = call @llvm.masked.load.nxv4i8.p0nxv4i8(* undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8 = call @llvm.masked.load.nxv8i8.p0nxv8i8(* undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8 = call @llvm.masked.load.nxv16i8.p0nxv16i8(* undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16 = call @llvm.masked.load.nxv2i16.p0nxv2i16(* undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16 = call @llvm.masked.load.nxv4i16.p0nxv4i16(* undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16 = call @llvm.masked.load.nxv8i16.p0nxv8i16(* undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32 = call @llvm.masked.load.nxv2i32.p0nxv2i32(* undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32 = call @llvm.masked.load.nxv4i32.p0nxv4i32(* undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64 = call @llvm.masked.load.nxv2i64.p0nxv2i64(* undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16 = call @llvm.masked.load.nxv2f16.p0nxv2f16(* undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16 = call @llvm.masked.load.nxv4f16.p0nxv4f16(* undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16 = call @llvm.masked.load.nxv8f16.p0nxv8f16(* undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32 = call @llvm.masked.load.nxv2f32.p0nxv2f32(* undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32 = call @llvm.masked.load.nxv4f32.p0nxv4f32(* undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64 = call @llvm.masked.load.nxv2f64.p0nxv2f64(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8 = call @llvm.masked.load.nxv2i8.p0nxv2i8(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8 = call @llvm.masked.load.nxv4i8.p0nxv4i8(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = call @llvm.masked.load.nxv8i8.p0nxv8i8(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = call @llvm.masked.load.nxv16i8.p0nxv16i8(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16 = call @llvm.masked.load.nxv2i16.p0nxv2i16(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = call @llvm.masked.load.nxv4i16.p0nxv4i16(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = call @llvm.masked.load.nxv8i16.p0nxv8i16(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = call @llvm.masked.load.nxv2i32.p0nxv2i32(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = call @llvm.masked.load.nxv4i32.p0nxv4i32(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = call @llvm.masked.load.nxv2i64.p0nxv2i64(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16 = call @llvm.masked.load.nxv2f16.p0nxv2f16(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16 = call @llvm.masked.load.nxv4f16.p0nxv4f16(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16 = call @llvm.masked.load.nxv8f16.p0nxv8f16(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32 = call @llvm.masked.load.nxv2f32.p0nxv2f32(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32 = call @llvm.masked.load.nxv4f32.p0nxv4f32(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64 = call @llvm.masked.load.nxv2f64.p0nxv2f64(* undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv1i64 = call @llvm.masked.load.nxv1i64.p0nxv1i64(* undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64 = call @llvm.masked.load.nxv4i64.p0nxv4i64(* undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32f16 = call @llvm.masked.load.nxv32f16.p0nxv32f16(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64 = call @llvm.masked.load.nxv4i64.p0nxv4i64(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32f16 = call @llvm.masked.load.nxv32f16.p0nxv32f16(* undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; entry: Index: llvm/test/Analysis/CostModel/AArch64/masked_ldst_vls.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/masked_ldst_vls.ll +++ llvm/test/Analysis/CostModel/AArch64/masked_ldst_vls.ll @@ -19,13 +19,13 @@ define void @fixed_sve_vls() #0 { ; CHECK-LABEL: 'fixed_sve_vls' -; CHECK: Cost Model: Found an estimated cost of [[#mul(div(2047,VBITS)+1,2)]] for instruction: %v256i8 = call <256 x i8> @llvm.masked.load.v256i8.p0v256i8(<256 x i8>* undef, i32 8, <256 x i1> undef, <256 x i8> undef) -; CHECK: Cost Model: Found an estimated cost of [[#mul(div(4091,VBITS)+1,2)]] for instruction: %v256i16 = call <256 x i16> @llvm.masked.load.v256i16.p0v256i16(<256 x i16>* undef, i32 8, <256 x i1> undef, <256 x i16> undef) -; CHECK: Cost Model: Found an estimated cost of [[#mul(div(511,VBITS)+1,2)]] for instruction: %v16i32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 8, <16 x i1> undef, <16 x i32> undef) -; CHECK: Cost Model: Found an estimated cost of [[#mul(div(1023,VBITS)+1,2)]] for instruction: %v16i64 = call <16 x i64> @llvm.masked.load.v16i64.p0v16i64(<16 x i64>* undef, i32 8, <16 x i1> undef, <16 x i64> undef) -; CHECK: Cost Model: Found an estimated cost of [[#mul(div(8191,VBITS)+1,2)]] for instruction: %v512f16 = call <512 x half> @llvm.masked.load.v512f16.p0v512f16(<512 x half>* undef, i32 8, <512 x i1> undef, <512 x half> undef) -; CHECK: Cost Model: Found an estimated cost of [[#mul(div(8191,VBITS)+1,2)]] for instruction: %v256f32 = call <256 x float> @llvm.masked.load.v256f32.p0v256f32(<256 x float>* undef, i32 8, <256 x i1> undef, <256 x float> undef) -; CHECK: Cost Model: Found an estimated cost of [[#mul(div(8191,VBITS)+1,2)]] for instruction: %v128f64 = call <128 x double> @llvm.masked.load.v128f64.p0v128f64(<128 x double>* undef, i32 8, <128 x i1> undef, <128 x double> undef) +; CHECK: Cost Model: Found an estimated cost of [[#div(2047,VBITS)+1]] for instruction: %v256i8 = call <256 x i8> @llvm.masked.load.v256i8.p0v256i8(<256 x i8>* undef, i32 8, <256 x i1> undef, <256 x i8> undef) +; CHECK: Cost Model: Found an estimated cost of [[#div(4091,VBITS)+1]] for instruction: %v256i16 = call <256 x i16> @llvm.masked.load.v256i16.p0v256i16(<256 x i16>* undef, i32 8, <256 x i1> undef, <256 x i16> undef) +; CHECK: Cost Model: Found an estimated cost of [[#div(511,VBITS)+1]] for instruction: %v16i32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 8, <16 x i1> undef, <16 x i32> undef) +; CHECK: Cost Model: Found an estimated cost of [[#div(1023,VBITS)+1]] for instruction: %v16i64 = call <16 x i64> @llvm.masked.load.v16i64.p0v16i64(<16 x i64>* undef, i32 8, <16 x i1> undef, <16 x i64> undef) +; CHECK: Cost Model: Found an estimated cost of [[#div(8191,VBITS)+1]] for instruction: %v512f16 = call <512 x half> @llvm.masked.load.v512f16.p0v512f16(<512 x half>* undef, i32 8, <512 x i1> undef, <512 x half> undef) +; CHECK: Cost Model: Found an estimated cost of [[#div(8191,VBITS)+1]] for instruction: %v256f32 = call <256 x float> @llvm.masked.load.v256f32.p0v256f32(<256 x float>* undef, i32 8, <256 x i1> undef, <256 x float> undef) +; CHECK: Cost Model: Found an estimated cost of [[#div(8191,VBITS)+1]] for instruction: %v128f64 = call <128 x double> @llvm.masked.load.v128f64.p0v128f64(<128 x double>* undef, i32 8, <128 x i1> undef, <128 x double> undef) ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: ret void entry: %v256i8 = call <256 x i8> @llvm.masked.load.v256i8.p0v256i8(<256 x i8> *undef, i32 8, <256 x i1> undef, <256 x i8> undef) Index: llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll @@ -46,7 +46,7 @@ ; CHECK-COST: Checking a loop in 'scalable' -; CHECK-COST: Found an estimated cost of 2 for VF vscale x 4 For instruction: store i32 2, i32* %arrayidx1, align 4 +; CHECK-COST: Found an estimated cost of 1 for VF vscale x 4 For instruction: store i32 2, i32* %arrayidx1, align 4 define void @scalable(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) #0 { ; CHECK-LABEL: @scalable(