diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1275,8 +1275,17 @@ InstructionCost Cost = 0; if (Opcode == Instruction::Store && OpInfo.isConstant()) Cost += getStoreImmCost(Src, OpInfo, CostKind); - return Cost + BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, - CostKind, OpInfo, I); + InstructionCost BaseCost = + BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind, OpInfo, I); + // Assume memory ops cost scale with the number of vector registers + // possible accessed by the instruction. Note that BasicTTI already + // handles the LT.first term for us. + if (std::pair LT = getTypeLegalizationCost(Src); + LT.second.isVector()) + BaseCost *= getLMULCost(LT.second); + return Cost + BaseCost; + } InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, diff --git a/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll b/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll --- a/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll +++ b/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll @@ -19,7 +19,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; @@ -56,21 +56,21 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8 = call @llvm.masked.load.nxv2i8.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8 = call @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = call @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = call @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8 = call @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16 = call @llvm.masked.load.nxv2i16.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = call @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = call @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16 = call @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = call @llvm.masked.load.nxv2i32.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = call @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = call @llvm.masked.load.nxv2i64.p0(ptr undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32 = call @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64 = call @llvm.masked.load.nxv2i64.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv2f16 = call @llvm.masked.load.nxv2f16.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4f16 = call @llvm.masked.load.nxv4f16.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8f16 = call @llvm.masked.load.nxv8f16.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32 = call @llvm.masked.load.nxv2f32.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32 = call @llvm.masked.load.nxv4f32.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64 = call @llvm.masked.load.nxv2f64.p0(ptr undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32 = call @llvm.masked.load.nxv4f32.p0(ptr undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64 = call @llvm.masked.load.nxv2f64.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = call @llvm.masked.load.nxv1i64.p0(ptr undef, i32 8, undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64 = call @llvm.masked.load.nxv4i64.p0(ptr undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64 = call @llvm.masked.load.nxv4i64.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv32f16 = call @llvm.masked.load.nxv32f16.p0(ptr undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll --- a/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll @@ -16,8 +16,8 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load , ptr %p, align 2 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load , ptr %p, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load , ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load , ptr %p, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load , ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = load , ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %13 = load , ptr %p, align 32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load i16, ptr %p, align 2 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <1 x i16>, ptr %p, align 2 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i16>, ptr %p, align 4 @@ -28,9 +28,9 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load , ptr %p, align 2 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load , ptr %p, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load , ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load , ptr %p, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load , ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = load , ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = load , ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = load , ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %26 = load , ptr %p, align 64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load i32, ptr %p, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <1 x i32>, ptr %p, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = load <2 x i32>, ptr %p, align 8 @@ -40,10 +40,10 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %33 = load <32 x i32>, ptr %p, align 128 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = load , ptr %p, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = load , ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load , ptr %p, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load , ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load , ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %39 = load , ptr %p, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %36 = load , ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %37 = load , ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %38 = load , ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %39 = load , ptr %p, align 128 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load i64, ptr %p, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = load <1 x i64>, ptr %p, align 8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %42 = load <2 x i64>, ptr %p, align 16 @@ -52,11 +52,11 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %45 = load <16 x i64>, ptr %p, align 128 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %46 = load <32 x i64>, ptr %p, align 256 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %47 = load , ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load , ptr %p, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %49 = load , ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %50 = load , ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %51 = load , ptr %p, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %52 = load , ptr %p, align 256 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %48 = load , ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %49 = load , ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %50 = load , ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %51 = load , ptr %p, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %52 = load , ptr %p, align 256 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %53 = load ptr, ptr %p, align 8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %54 = load <1 x ptr>, ptr %p, align 8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %55 = load <2 x ptr>, ptr %p, align 16 @@ -65,11 +65,11 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %58 = load <16 x ptr>, ptr %p, align 128 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %59 = load <32 x ptr>, ptr %p, align 256 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %60 = load , ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %61 = load , ptr %p, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %62 = load , ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %63 = load , ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %64 = load , ptr %p, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %65 = load , ptr %p, align 256 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %61 = load , ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %62 = load , ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %63 = load , ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %64 = load , ptr %p, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %65 = load , ptr %p, align 256 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; load i8, ptr %p @@ -159,8 +159,8 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 2 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store undef, ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store undef, ptr %p, align 32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr %p, align 2 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr %p, align 2 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr %p, align 4 @@ -171,9 +171,9 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 2 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store undef, ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store undef, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store undef, ptr %p, align 64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr %p, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, ptr %p, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr %p, align 8 @@ -183,10 +183,10 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <32 x i32> undef, ptr %p, align 128 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store undef, ptr %p, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store undef, ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store undef, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store undef, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store undef, ptr %p, align 128 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr %p, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, ptr %p, align 8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr %p, align 16 @@ -195,11 +195,11 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <16 x i64> undef, ptr %p, align 128 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <32 x i64> undef, ptr %p, align 256 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store undef, ptr %p, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store undef, ptr %p, align 256 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store undef, ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store undef, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store undef, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store undef, ptr %p, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store undef, ptr %p, align 256 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store ptr undef, ptr %p, align 8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x ptr> undef, ptr %p, align 8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr %p, align 16 @@ -208,11 +208,11 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <16 x ptr> undef, ptr %p, align 128 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <32 x ptr> undef, ptr %p, align 256 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store undef, ptr %p, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store undef, ptr %p, align 256 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store undef, ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store undef, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store undef, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store undef, ptr %p, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store undef, ptr %p, align 256 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; store i8 undef, ptr %p diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll @@ -24,10 +24,10 @@ ; VF_8: Found an estimated cost of 0 for VF 8 For instruction: store i8 %a0, ptr %p0, align 1 ; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i8 %a1, ptr %p1, align 1 ; VF_16-LABEL: Checking a loop in 'i8_factor_2' -; VF_16: Found an estimated cost of 2 for VF 16 For instruction: %l0 = load i8, ptr %p0, align 1 +; VF_16: Found an estimated cost of 3 for VF 16 For instruction: %l0 = load i8, ptr %p0, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %l1 = load i8, ptr %p1, align 1 ; VF_16: Found an estimated cost of 0 for VF 16 For instruction: store i8 %a0, ptr %p0, align 1 -; VF_16-NEXT: Found an estimated cost of 2 for VF 16 For instruction: store i8 %a1, ptr %p1, align 1 +; VF_16-NEXT: Found an estimated cost of 3 for VF 16 For instruction: store i8 %a1, ptr %p1, align 1 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %p0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0 @@ -65,12 +65,12 @@ ; VF_4: Found an estimated cost of 0 for VF 4 For instruction: store i8 %a1, ptr %p1, align 1 ; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i8 %a2, ptr %p2, align 1 ; VF_8-LABEL: Checking a loop in 'i8_factor_3' -; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %l0 = load i8, ptr %p0, align 1 +; VF_8: Found an estimated cost of 3 for VF 8 For instruction: %l0 = load i8, ptr %p0, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %l1 = load i8, ptr %p1, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %l2 = load i8, ptr %p2, align 1 ; VF_8: Found an estimated cost of 0 for VF 8 For instruction: store i8 %a0, ptr %p0, align 1 ; VF_8: Found an estimated cost of 0 for VF 8 For instruction: store i8 %a1, ptr %p1, align 1 -; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i8 %a2, ptr %p2, align 1 +; VF_8-NEXT: Found an estimated cost of 3 for VF 8 For instruction: store i8 %a2, ptr %p2, align 1 ; VF_16-LABEL: Checking a loop in 'i8_factor_3' ; VF_16: Found an estimated cost of 48 for VF 16 For instruction: %l0 = load i8, ptr %p0, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %l1 = load i8, ptr %p1, align 1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -36,10 +36,10 @@ ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 7 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %add9 = add i32 %1, 1 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 7 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0 @@ -79,10 +79,10 @@ ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 7 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %add9 = add i32 %1, 1 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 7 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0 @@ -105,7 +105,7 @@ ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-NEXT: LV: The target has 31 registers of RISCV::GPRRC register class ; CHECK-NEXT: LV: The target has 32 registers of RISCV::VRRC register class -; CHECK-NEXT: LV: Loop cost is 23 +; CHECK-NEXT: LV: Loop cost is 25 ; CHECK-NEXT: LV: IC is 1 ; CHECK-NEXT: LV: VF is vscale x 4 ; CHECK-NEXT: LV: Not Interleaving. @@ -168,10 +168,10 @@ ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 7 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %conv1 = fadd float %1, 1.000000e+00 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 7 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0 @@ -211,10 +211,10 @@ ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 7 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %conv1 = fadd float %1, 1.000000e+00 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 7 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0 @@ -237,7 +237,7 @@ ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-NEXT: LV: The target has 31 registers of RISCV::GPRRC register class ; CHECK-NEXT: LV: The target has 32 registers of RISCV::VRRC register class -; CHECK-NEXT: LV: Loop cost is 23 +; CHECK-NEXT: LV: Loop cost is 25 ; CHECK-NEXT: LV: IC is 1 ; CHECK-NEXT: LV: VF is vscale x 4 ; CHECK-NEXT: LV: Not Interleaving. diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -5,28 +5,45 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-LABEL: @single_constant_stride_int_scaled( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 1024, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = mul nuw nsw <8 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], <8 x i64> [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x ptr> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <64 x i32>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <64 x i32> [[WIDE_VEC]], <64 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP4]], <8 x ptr> [[TMP1]], i32 4, <8 x i1> ) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1016 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP12:%.*]] = mul nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[P:%.*]], [[TMP12]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP14:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP14]], [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1016, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -141,33 +158,26 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 8064 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 8128 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <8 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <8 x i64> -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x ptr> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x ptr> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x ptr> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> -; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC2]], <16 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC]], -; CHECK-NEXT: [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC3]], -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP6]], <8 x ptr> [[TMP0]], i32 4, <8 x i1> ) -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP7]], <8 x ptr> [[TMP1]], i32 4, <8 x i1> ) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 128 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1008 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[STRIDED_VEC]], +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP3]], <8 x ptr> [[TMP0]], i32 4, <8 x i1> ) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 64 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1016 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1008, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1016, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll b/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll @@ -19,16 +19,13 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], <4 x i64> [[VEC_IND]] -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i16>, ptr [[TMP2]], align 2 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i16> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> [[TMP3]], <4 x ptr> [[TMP0]], i32 2, <4 x i1> ) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> [[TMP0]], i32 2, <4 x i1> , <4 x i16> poison) +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i16> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> [[TMP1]], <4 x ptr> [[TMP0]], i32 2, <4 x i1> ) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 +; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: