diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 | FileCheck %s + +; This test checks a regression in the vsetvli insertion pass. +; Block .LBB0_4 has an illegal attempt to preserve VL in +; "vsetvli zero,zero,e32,m1" when the last update of VL may be the vsetvli in block +; .LBB0_2, "vsetvli zero,zero,e64,m2". + +define void @vector_loop(i32* %out, i64 %n, i64 %m) { +; CHECK-LABEL: vector_loop: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a3, %hi(.LCPI0_0) +; CHECK-NEXT: addi a3, a3, %lo(.LCPI0_0) +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vle64.v v26, (a3) +; CHECK-NEXT: mv a6, zero +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: j .LBB0_2 +; CHECK-NEXT: .LBB0_1: # %outer.latch +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: addi a6, a6, 1 +; CHECK-NEXT: bgeu a6, a1, .LBB0_5 +; CHECK-NEXT: .LBB0_2: # %outer.loop +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB0_4 Depth 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmsne.vx v28, v26, a6 +; CHECK-NEXT: vpopc.m a4, v28 +; CHECK-NEXT: beqz a4, .LBB0_1 +; CHECK-NEXT: # %bb.3: # %inner.loop.preheader +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: mv a4, zero +; CHECK-NEXT: vmv1r.v v28, v25 +; CHECK-NEXT: .LBB0_4: # %inner.loop +; CHECK-NEXT: # Parent Loop BB0_2 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmv.x.s a5, v28 +; CHECK-NEXT: slli a3, a5, 2 +; CHECK-NEXT: add a3, a0, a3 +; CHECK-NEXT: sw a5, 0(a3) +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: addi a4, a4, 1 +; CHECK-NEXT: vadd.vi v28, v28, 1 +; CHECK-NEXT: bltu a4, a2, .LBB0_4 +; CHECK-NEXT: j .LBB0_1 +; CHECK-NEXT: .LBB0_5: # %exit +; CHECK-NEXT: ret +entry: + %index = tail call <4 x i64> @llvm.experimental.stepvector.v4i64() + %neg.index = sub <4 x i64> zeroinitializer, %index + br label %outer.loop + +outer.loop: + %iv = phi i64 [ 0, %entry ], [ %inc, %outer.latch ] + %splatinsert = insertelement <4 x i64> poison, i64 %iv, i32 0 + %splat = shufflevector <4 x i64> %splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer + %c = icmp ne <4 x i64> %splat, %neg.index + %reduce = tail call i1 @llvm.vector.reduce.or.nxv4i1(<4 x i1> %c) + br i1 %reduce, label %inner.loop, label %outer.latch + +inner.loop: + %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.inc, %inner.loop ] + %vec.iv = phi <4 x i32> [ zeroinitializer, %outer.loop ], [ %vec.iv.inc, %inner.loop ] + %v30 = extractelement <4 x i32> %vec.iv, i64 0 + %addr = getelementptr inbounds i32, i32* %out, i32 %v30 + store i32 %v30, i32* %addr, align 4 + %vec.iv.inc = add <4 x i32> %vec.iv, + %inner.inc = add i64 %inner.iv, 1 + %cmp = icmp ult i64 %inner.inc, %m + br i1 %cmp, label %inner.loop, label %outer.latch + +outer.latch: + %inc = add i64 %iv, 1 + %atlimit = icmp ult i64 %inc, %n + br i1 %atlimit, label %outer.loop, label %exit + +exit: + ret void +} + +declare i1 @llvm.vector.reduce.or.nxv4i1(<4 x i1>) +declare <4 x i64> @llvm.experimental.stepvector.v4i64()