diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -44,7 +44,7 @@ "with zero meaning no minimum size is assumed. A value of -1 " "means use Zvl*b extension. This is primarily used to enable " "autovectorization with fixed width vectors."), - cl::init(0), cl::Hidden); + cl::init(-1), cl::Hidden); static cl::opt RVVVectorLMULMax( "riscv-v-fixed-length-vector-lmul-max", diff --git a/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll b/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll --- a/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll +++ b/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll @@ -15,16 +15,16 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv1i1_i32 = call @llvm.get.active.lane.mask.nxv1i1.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mask_nxv32i1_i64 = call @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv16i1_i16 = call @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %mask_nxv16i1_i64 = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) diff --git a/llvm/test/CodeGen/RISCV/fold-vector-cmp.ll b/llvm/test/CodeGen/RISCV/fold-vector-cmp.ll --- a/llvm/test/CodeGen/RISCV/fold-vector-cmp.ll +++ b/llvm/test/CodeGen/RISCV/fold-vector-cmp.ll @@ -1,14 +1,31 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -start-after codegenprepare -mtriple=riscv64 -mattr=-v -o - %s | FileCheck %s -; RUN: llc -start-after codegenprepare -mtriple=riscv64 -mattr=+v -o - %s | FileCheck %s +; RUN: llc -start-after codegenprepare -mtriple=riscv64 -mattr=-v -o - %s | FileCheck --check-prefix=CHECK-NOV %s +; RUN: llc -start-after codegenprepare -mtriple=riscv64 -mattr=+v -o - %s | FileCheck --check-prefix=CHECK-V %s ; Reproducer for https://github.com/llvm/llvm-project/issues/55168. ; We should always return 1 (and not -1). define i32 @test(i32 %call.i) { -; CHECK-LABEL: test: -; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: ret +; CHECK-NOV-LABEL: test: +; CHECK-NOV: # %bb.0: +; CHECK-NOV-NEXT: li a0, 1 +; CHECK-NOV-NEXT: ret +; +; CHECK-V-LABEL: test: +; CHECK-V: # %bb.0: +; CHECK-V-NEXT: lui a1, 524288 +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-V-NEXT: vmv.v.x v8, a1 +; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, tu, mu +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addiw a0, a1, 2 +; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-V-NEXT: vmslt.vx v0, v8, a0 +; CHECK-V-NEXT: vmv.v.i v8, 0 +; CHECK-V-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-V-NEXT: vmv.x.s a0, v8 +; CHECK-V-NEXT: ret %t2 = insertelement <2 x i32> , i32 %call.i, i64 0 %t3 = icmp slt <2 x i32> %t2, %t4 = zext <2 x i1> %t3 to <2 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll @@ -101,17 +101,63 @@ define void @uniform_store_i1(i1* noalias %dst, i64* noalias %start, i64 %N) { ; CHECK-LABEL: @uniform_store_i1( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i64, i64* [[START:%.*]], i64 [[N_VEC]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64*> poison, i64* [[START]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64*> [[BROADCAST_SPLATINSERT]], <2 x i64*> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64*> poison, i64* [[START]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64*> [[BROADCAST_SPLATINSERT3]], <2 x i64*> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i64* [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <2 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64*> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, i64* [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, i64* [[TMP3]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[TMP6]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, <2 x i64*> [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, <2 x i64*> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <2 x i64*> [[TMP8]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <2 x i64*> [[TMP9]], [[BROADCAST_SPLAT4]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: store i1 [[TMP12]], i1* [[DST:%.*]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 +; CHECK-NEXT: store i1 [[TMP13]], i1* [[DST]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0 +; CHECK-NEXT: store i1 [[TMP14]], i1* [[DST]], align 1 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1 +; CHECK-NEXT: store i1 [[TMP15]], i1* [[DST]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i64, i64* [[POINTER_PHI]], i64 4 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[FIRST_SROA:%.*]] = phi i64* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[FIRST_SROA:%.*]] = phi i64* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[FIRST_SROA]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, i64* [[FIRST_SROA]], align 4 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i64, i64* [[FIRST_SROA]], i64 1 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64* [[INCDEC_PTR]], [[START]] -; CHECK-NEXT: store i1 [[CMP_NOT]], i1* [[DST:%.*]], align 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV]], [[N:%.*]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]], !llvm.loop [[LOOP0]] +; CHECK-NEXT: store i1 [[CMP_NOT]], i1* [[DST]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: end: ; CHECK-NEXT: ret void ;