diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -23,7 +23,7 @@ cl::desc( "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used " "by autovectorized code. Fractional LMULs are not supported."), - cl::init(1), cl::Hidden); + cl::init(2), cl::Hidden); static cl::opt SLPMaxVF( "riscv-v-slp-max-vf", diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll @@ -9,25 +9,43 @@ ; DEFAULT-LABEL: @load_store( ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; DEFAULT-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; DEFAULT: vector.ph: -; DEFAULT-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; DEFAULT-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP2]] -; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 -; DEFAULT-NEXT: [[TMP5:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; DEFAULT-NEXT: store [[TMP5]], ptr [[TMP4]], align 4 -; DEFAULT-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; DEFAULT-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; DEFAULT-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; DEFAULT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; DEFAULT-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; DEFAULT-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 +; DEFAULT-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 +; DEFAULT-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] +; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP4]] +; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP9]] +; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 +; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 +; DEFAULT-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; DEFAULT-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 +; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]] +; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP15]], align 4 +; DEFAULT-NEXT: [[TMP16:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; DEFAULT-NEXT: [[TMP17:%.*]] = add [[WIDE_LOAD1]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; DEFAULT-NEXT: store [[TMP16]], ptr [[TMP12]], align 4 +; DEFAULT-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; DEFAULT-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 +; DEFAULT-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP19]] +; DEFAULT-NEXT: store [[TMP17]], ptr [[TMP20]], align 4 +; DEFAULT-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; DEFAULT-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]] +; DEFAULT-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; DEFAULT-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]