Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -30,6 +30,12 @@ static cl::opt EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", cl::init(true), cl::Hidden); +static cl::opt SVEGatherOverhead("sve-gather-overhead", cl::init(10), + cl::Hidden); + +static cl::opt SVEScatterOverhead("sve-scatter-overhead", + cl::init(10), cl::Hidden); + bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, const Function *Callee) const { const TargetMachine &TM = getTLI()->getTargetMachine(); @@ -1777,6 +1783,10 @@ return LT.first * 2; } +static unsigned getSVEGatherScatterOverhead(unsigned Opcode) { + return Opcode == Instruction::Load ? SVEGatherOverhead : SVEScatterOverhead; +} + InstructionCost AArch64TTIImpl::getGatherScatterOpCost( unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) { @@ -1799,6 +1809,10 @@ ElementCount LegalVF = LT.second.getVectorElementCount(); InstructionCost MemOpCost = getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I); + // Add on an overhead cost for using gathers/scatters. + // TODO: At the moment this is applied unilaterally for all CPUs, but at some + // point we may want a per-CPU overhead. + MemOpCost *= getSVEGatherScatterOverhead(Opcode); return LT.first * MemOpCost * getMaxNumElements(LegalVF); } Index: llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll +++ llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll @@ -92,8 +92,8 @@ ; CHECK: gather_load_4xi8_constant_mask ; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8 -; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8 -; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8 +; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8 +; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8 ; %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) ret <4 x i8> %lv @@ -103,8 +103,8 @@ ; CHECK: gather_load_4xi8_variable_mask ; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8 -; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8 -; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8 +; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8 +; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8 ; %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef) ret <4 x i8> %lv @@ -115,8 +115,8 @@ ; CHECK: scatter_store_4xi8_constant_mask ; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8( ; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8( -; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8( -; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8( +; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8( +; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8( ; call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %val, <4 x i8*> %ptrs, i32 1, <4 x i1> ) ret void @@ -126,8 +126,8 @@ ; CHECK: scatter_store_4xi8_variable_mask ; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8( ; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8( -; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8( -; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8( +; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8( +; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8( ; call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %val, <4 x i8*> %ptrs, i32 1, <4 x i1> %cond) ret void @@ -138,8 +138,8 @@ ; CHECK: gather_load_4xi32_constant_mask ; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 -; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 -; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 +; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 +; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 ; %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 1, <4 x i1> , <4 x i32> undef) ret <4 x i32> %lv @@ -149,8 +149,8 @@ ; CHECK: gather_load_4xi32_variable_mask ; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 -; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 -; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 +; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 +; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 ; %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef) ret <4 x i32> %lv @@ -161,8 +161,8 @@ ; CHECK: scatter_store_4xi32_constant_mask ; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32( ; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32( -; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32( -; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32( +; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32( +; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32( ; call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %ptrs, i32 1, <4 x i1> ) ret void @@ -172,8 +172,8 @@ ; CHECK: scatter_store_4xi32_variable_mask ; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32( ; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32( -; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32( -; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32( +; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32( +; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32( ; call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %ptrs, i32 1, <4 x i1> %cond) ret void @@ -184,8 +184,8 @@ ; CHECK-LABEL: 'sve_scatter_vls' ; CHECK-NEON: Cost Model: Found an estimated cost of 1952 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer) ; CHECK-SVE-128: Cost Model: Found an estimated cost of 1952 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer) -; CHECK-SVE-256: Cost Model: Found an estimated cost of 256 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer) -; CHECK-SVE-512: Cost Model: Found an estimated cost of 256 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer) +; CHECK-SVE-256: Cost Model: Found an estimated cost of 2560 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer) +; CHECK-SVE-512: Cost Model: Found an estimated cost of 2560 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer) entry: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer) ret void @@ -196,8 +196,8 @@ ; CHECK-LABEL: 'sve_gather_vls_float' ; CHECK-NEON: Cost Model: Found an estimated cost of 1856 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer) ; CHECK-SVE-128: Cost Model: Found an estimated cost of 1856 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer) -; CHECK-SVE-256: Cost Model: Found an estimated cost of 256 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer) -; CHECK-SVE-512: Cost Model: Found an estimated cost of 256 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer) +; CHECK-SVE-256: Cost Model: Found an estimated cost of 2560 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer) +; CHECK-SVE-512: Cost Model: Found an estimated cost of 2560 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer) entry: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer) ret void @@ -208,8 +208,8 @@ ; CHECK-LABEL: 'sve_scatter_vls' ; CHECK-NEON: Cost Model: Found an estimated cost of 2000 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask) ; CHECK-SVE-128: Cost Model: Found an estimated cost of 2000 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask) -; CHECK-SVE-256: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask) -; CHECK-SVE-512: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask) +; CHECK-SVE-256: Cost Model: Found an estimated cost of 2560 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask) +; CHECK-SVE-512: Cost Model: Found an estimated cost of 2560 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask) entry: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask) ret void @@ -220,8 +220,8 @@ ; CHECK-LABEL: 'sve_scatter_vls_float' ; CHECK-NEON: Cost Model: Found an estimated cost of 3904 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask) ; CHECK-SVE-128: Cost Model: Found an estimated cost of 3904 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask) -; CHECK-SVE-256: Cost Model: Found an estimated cost of 512 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask) -; CHECK-SVE-512: Cost Model: Found an estimated cost of 512 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask) +; CHECK-SVE-256: Cost Model: Found an estimated cost of 5120 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask) +; CHECK-SVE-512: Cost Model: Found an estimated cost of 5120 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask) call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask) ret void -} \ No newline at end of file +} Index: llvm/test/Analysis/CostModel/AArch64/sve-gather.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/sve-gather.ll +++ llvm/test/Analysis/CostModel/AArch64/sve-gather.ll @@ -8,20 +8,20 @@ define void @masked_gathers( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) #0 { ; CHECK-LABEL: 'masked_gathers' -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 0, %nxv1i1mask, zeroinitializer) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-2-LABEL: 'masked_gathers' -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) ; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 0, %nxv1i1mask, zeroinitializer) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-1-LABEL: 'masked_gathers' -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) ; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 0, %nxv1i1mask, zeroinitializer) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -33,20 +33,20 @@ define void @masked_gathers_tune_generic( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) #1 { ; CHECK-LABEL: 'masked_gathers_tune_generic' -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 0, %nxv1i1mask, zeroinitializer) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-2-LABEL: 'masked_gathers_tune_generic' -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) ; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 0, %nxv1i1mask, zeroinitializer) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-1-LABEL: 'masked_gathers_tune_generic' -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) ; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 0, %nxv1i1mask, zeroinitializer) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -58,36 +58,36 @@ define void @masked_gathers_no_vscale_range() #2 { ; CHECK-LABEL: 'masked_gathers_no_vscale_range' -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv4f64 = call @llvm.masked.gather.nxv4f64.nxv4p0f64( undef, i32 1, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res.nxv2f64 = call @llvm.masked.gather.nxv2f64.nxv2p0f64( undef, i32 1, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res.nxv8f32 = call @llvm.masked.gather.nxv8f32.nxv8p0f32( undef, i32 1, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv4f32 = call @llvm.masked.gather.nxv4f32.nxv4p0f32( undef, i32 1, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res.nxv2f32 = call @llvm.masked.gather.nxv2f32.nxv2p0f32( undef, i32 1, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res.nxv16i16 = call @llvm.masked.gather.nxv16i16.nxv16p0i16( undef, i32 1, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res.nxv8i16 = call @llvm.masked.gather.nxv8i16.nxv8p0i16( undef, i32 1, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv4i16 = call @llvm.masked.gather.nxv4i16.nxv4p0i16( undef, i32 1, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4f64 = call @llvm.masked.gather.nxv4f64.nxv4p0f64( undef, i32 1, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv2f64 = call @llvm.masked.gather.nxv2f64.nxv2p0f64( undef, i32 1, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8f32 = call @llvm.masked.gather.nxv8f32.nxv8p0f32( undef, i32 1, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4f32 = call @llvm.masked.gather.nxv4f32.nxv4p0f32( undef, i32 1, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv2f32 = call @llvm.masked.gather.nxv2f32.nxv2p0f32( undef, i32 1, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %res.nxv16i16 = call @llvm.masked.gather.nxv16i16.nxv16p0i16( undef, i32 1, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i16 = call @llvm.masked.gather.nxv8i16.nxv8p0i16( undef, i32 1, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i16 = call @llvm.masked.gather.nxv4i16.nxv4p0i16( undef, i32 1, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-2-LABEL: 'masked_gathers_no_vscale_range' -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv4f64 = call @llvm.masked.gather.nxv4f64.nxv4p0f64( undef, i32 1, undef, undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res.nxv2f64 = call @llvm.masked.gather.nxv2f64.nxv2p0f64( undef, i32 1, undef, undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res.nxv8f32 = call @llvm.masked.gather.nxv8f32.nxv8p0f32( undef, i32 1, undef, undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv4f32 = call @llvm.masked.gather.nxv4f32.nxv4p0f32( undef, i32 1, undef, undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res.nxv2f32 = call @llvm.masked.gather.nxv2f32.nxv2p0f32( undef, i32 1, undef, undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res.nxv16i16 = call @llvm.masked.gather.nxv16i16.nxv16p0i16( undef, i32 1, undef, undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res.nxv8i16 = call @llvm.masked.gather.nxv8i16.nxv8p0i16( undef, i32 1, undef, undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv4i16 = call @llvm.masked.gather.nxv4i16.nxv4p0i16( undef, i32 1, undef, undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4f64 = call @llvm.masked.gather.nxv4f64.nxv4p0f64( undef, i32 1, undef, undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv2f64 = call @llvm.masked.gather.nxv2f64.nxv2p0f64( undef, i32 1, undef, undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8f32 = call @llvm.masked.gather.nxv8f32.nxv8p0f32( undef, i32 1, undef, undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4f32 = call @llvm.masked.gather.nxv4f32.nxv4p0f32( undef, i32 1, undef, undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv2f32 = call @llvm.masked.gather.nxv2f32.nxv2p0f32( undef, i32 1, undef, undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %res.nxv16i16 = call @llvm.masked.gather.nxv16i16.nxv16p0i16( undef, i32 1, undef, undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i16 = call @llvm.masked.gather.nxv8i16.nxv8p0i16( undef, i32 1, undef, undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i16 = call @llvm.masked.gather.nxv4i16.nxv4p0i16( undef, i32 1, undef, undef) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-1-LABEL: 'masked_gathers_no_vscale_range' -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res.nxv4f64 = call @llvm.masked.gather.nxv4f64.nxv4p0f64( undef, i32 1, undef, undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res.nxv2f64 = call @llvm.masked.gather.nxv2f64.nxv2p0f64( undef, i32 1, undef, undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv8f32 = call @llvm.masked.gather.nxv8f32.nxv8p0f32( undef, i32 1, undef, undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res.nxv4f32 = call @llvm.masked.gather.nxv4f32.nxv4p0f32( undef, i32 1, undef, undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res.nxv2f32 = call @llvm.masked.gather.nxv2f32.nxv2p0f32( undef, i32 1, undef, undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res.nxv16i16 = call @llvm.masked.gather.nxv16i16.nxv16p0i16( undef, i32 1, undef, undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv8i16 = call @llvm.masked.gather.nxv8i16.nxv8p0i16( undef, i32 1, undef, undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res.nxv4i16 = call @llvm.masked.gather.nxv4i16.nxv4p0i16( undef, i32 1, undef, undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv4f64 = call @llvm.masked.gather.nxv4f64.nxv4p0f64( undef, i32 1, undef, undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res.nxv2f64 = call @llvm.masked.gather.nxv2f64.nxv2p0f64( undef, i32 1, undef, undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv8f32 = call @llvm.masked.gather.nxv8f32.nxv8p0f32( undef, i32 1, undef, undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv4f32 = call @llvm.masked.gather.nxv4f32.nxv4p0f32( undef, i32 1, undef, undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res.nxv2f32 = call @llvm.masked.gather.nxv2f32.nxv2p0f32( undef, i32 1, undef, undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv16i16 = call @llvm.masked.gather.nxv16i16.nxv16p0i16( undef, i32 1, undef, undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv8i16 = call @llvm.masked.gather.nxv8i16.nxv8p0i16( undef, i32 1, undef, undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv4i16 = call @llvm.masked.gather.nxv4i16.nxv4p0i16( undef, i32 1, undef, undef) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %res.nxv4f64 = call @llvm.masked.gather.nxv4f64( undef, i32 1, undef, undef) Index: llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll +++ llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll @@ -8,20 +8,20 @@ define void @masked_scatters( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) #0 { ; CHECK-LABEL: 'masked_scatters' -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 0, %nxv1i1mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-2-LABEL: 'masked_scatters' -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) ; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 0, %nxv1i1mask) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-1-LABEL: 'masked_scatters' -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) ; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 0, %nxv1i1mask) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -33,20 +33,20 @@ define void @masked_scatters_tune_generic( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) #1 { ; CHECK-LABEL: 'masked_scatters_tune_generic' -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 0, %nxv1i1mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-2-LABEL: 'masked_scatters_tune_generic' -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) ; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 0, %nxv1i1mask) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-1-LABEL: 'masked_scatters_tune_generic' -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) ; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 0, %nxv1i1mask) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -58,36 +58,36 @@ define void @masked_scatters_no_vscale_range() #2 { ; CHECK-LABEL: 'masked_scatters_no_vscale_range' -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64( undef, undef, i32 1, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64( undef, undef, i32 1, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( undef, undef, i32 1, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32( undef, undef, i32 1, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32( undef, undef, i32 1, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16( undef, undef, i32 1, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( undef, undef, i32 1, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16( undef, undef, i32 1, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64( undef, undef, i32 1, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64( undef, undef, i32 1, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( undef, undef, i32 1, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32( undef, undef, i32 1, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32( undef, undef, i32 1, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16( undef, undef, i32 1, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( undef, undef, i32 1, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16( undef, undef, i32 1, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-2-LABEL: 'masked_scatters_no_vscale_range' -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64( undef, undef, i32 1, undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64( undef, undef, i32 1, undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( undef, undef, i32 1, undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32( undef, undef, i32 1, undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32( undef, undef, i32 1, undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16( undef, undef, i32 1, undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( undef, undef, i32 1, undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16( undef, undef, i32 1, undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64( undef, undef, i32 1, undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64( undef, undef, i32 1, undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( undef, undef, i32 1, undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32( undef, undef, i32 1, undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32( undef, undef, i32 1, undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16( undef, undef, i32 1, undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( undef, undef, i32 1, undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16( undef, undef, i32 1, undef) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-1-LABEL: 'masked_scatters_no_vscale_range' -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64( undef, undef, i32 1, undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64( undef, undef, i32 1, undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( undef, undef, i32 1, undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32( undef, undef, i32 1, undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32( undef, undef, i32 1, undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16( undef, undef, i32 1, undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( undef, undef, i32 1, undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16( undef, undef, i32 1, undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64( undef, undef, i32 1, undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64( undef, undef, i32 1, undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( undef, undef, i32 1, undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32( undef, undef, i32 1, undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32( undef, undef, i32 1, undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16( undef, undef, i32 1, undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( undef, undef, i32 1, undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16( undef, undef, i32 1, undef) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call void @llvm.masked.scatter.nxv4f64( undef, undef, i32 1, undef) Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll @@ -4,7 +4,7 @@ target triple="aarch64--linux-gnu" ; CHECK: LV: Checking a loop in "gather_nxv4i32_loaded_index" -; CHECK: LV: Found an estimated cost of 9 for VF vscale x 4 For instruction: %1 = load float, float* %arrayidx3, align 4 +; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %1 = load float, float* %arrayidx3, align 4 define void @gather_nxv4i32_loaded_index(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 { entry: br label %for.body @@ -26,7 +26,7 @@ } ; CHECK: LV: Checking a loop in "scatter_nxv4i32_loaded_index" -; CHECK: LV: Found an estimated cost of 9 for VF vscale x 4 For instruction: store float %1, float* %arrayidx5, align 4 +; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %1, float* %arrayidx5, align 4 define void @scatter_nxv4i32_loaded_index(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 { entry: br label %for.body @@ -94,7 +94,7 @@ } ; CHECK: LV: Checking a loop in "gather_nxv4i32_stride2" -; CHECK: LV: Found an estimated cost of 9 for VF vscale x 4 For instruction: %0 = load float, float* %arrayidx, align 4 +; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %0 = load float, float* %arrayidx, align 4 define void @gather_nxv4i32_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { entry: br label %for.body @@ -115,7 +115,7 @@ } ; CHECK: LV: Checking a loop in "scatter_nxv4i32_stride2" -; CHECK: LV: Found an estimated cost of 9 for VF vscale x 4 For instruction: store float %0, float* %arrayidx2, align 4 +; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %0, float* %arrayidx2, align 4 define void @scatter_nxv4i32_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { entry: br label %for.body @@ -137,7 +137,7 @@ ; CHECK: LV: Checking a loop in "gather_nxv4i32_stride64" -; CHECK: LV: Found an estimated cost of 9 for VF vscale x 4 For instruction: %0 = load float, float* %arrayidx, align 4 +; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %0 = load float, float* %arrayidx, align 4 define void @gather_nxv4i32_stride64(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { entry: br label %for.body @@ -158,7 +158,7 @@ } ; CHECK: LV: Checking a loop in "scatter_nxv4i32_stride64" -; CHECK: LV: Found an estimated cost of 9 for VF vscale x 4 For instruction: store float %0, float* %arrayidx2, align 4 +; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %0, float* %arrayidx2, align 4 define void @scatter_nxv4i32_stride64(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { entry: br label %for.body