diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -56,6 +56,7 @@ bool shouldExpandReduction(const IntrinsicInst *II) const; bool supportsScalableVectors() const { return ST->hasVInstructions(); } + bool enableScalableVectorization() const { return ST->hasVInstructions(); } Optional getMaxVScale() const; TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-vectorize -riscv-v-vector-bits-min=128 -scalable-vectorization=on -force-target-instruction-cost=1 -S < %s | FileCheck %s +; RUN: opt -loop-vectorize -riscv-v-vector-bits-min=128 -force-target-instruction-cost=1 -S < %s | FileCheck %s target triple = "riscv64" diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -scalable-vectorization=on \ +; RUN: opt < %s -loop-vectorize \ ; RUN: -riscv-v-vector-bits-min=128 -riscv-v-vector-bits-max=128 \ ; RUN: -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize \ ; RUN: -pass-remarks-missed=loop-vectorize -mtriple riscv64-linux-gnu \ diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll @@ -1,6 +1,5 @@ ; RUN: opt -mtriple=riscv64 -mattr=+m,+v -loop-vectorize \ -; RUN: -riscv-v-vector-bits-max=512 -S -scalable-vectorization=on < %s 2>&1 \ -; RUN: | FileCheck %s +; RUN: -riscv-v-vector-bits-max=512 -S < %s 2>&1 | FileCheck %s ; void test(int *a, int *b, int N) { ; #pragma clang loop vectorize(enable) vectorize_width(2, scalable) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll b/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll @@ -9,16 +9,63 @@ ; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0 ; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SIZE]], [[TMP2]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[SIZE]], [[TMP4]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[SIZE]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[INARRAY:%.*]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[INARRAY]], i32 [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to * +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP14]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to * +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , * [[TMP18]], align 4 +; CHECK-NEXT: [[TMP19:%.*]] = add nsw [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 6, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP20:%.*]] = add nsw [[WIDE_LOAD1]], shufflevector ( insertelement ( poison, i32 6, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP13]] to * +; CHECK-NEXT: store [[TMP19]], * [[TMP21]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 2 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to * +; CHECK-NEXT: store [[TMP20]], * [[TMP25]], align 4 +; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP27:%.*]] = mul i32 [[TMP26]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV1:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[INARRAY:%.*]], i32 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP2]], 6 -; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP1]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV1:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[INARRAY]], i32 [[IV]] +; CHECK-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +; CHECK-NEXT: [[TMP31:%.*]] = add nsw i32 [[TMP30]], 6 +; CHECK-NEXT: store i32 [[TMP31]], i32* [[TMP29]], align 4 ; CHECK-NEXT: [[IV1]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV1]], [[SIZE]] -; CHECK-NEXT: br i1 [[COND]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT_LOOPEXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: